Kernels
github-actions[bot] commited on
Commit
46020a2
Β·
1 Parent(s): ad23c2a

Add built binary [skip-build]

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. build/torch210-cxx11-cu126-x86_64-linux/__init__.py +53 -0
  2. build/{torch28-cxx11-cu128-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-cu126-x86_64-linux/_activation_18b7543_dirty.abi3.so} +2 -2
  3. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/_ops.py +3 -3
  4. build/torch210-cxx11-cu126-x86_64-linux/activation/__init__.py +26 -0
  5. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/fused_add_rms_norm_meta.py +7 -2
  6. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/layers.py +0 -0
  7. build/torch210-cxx11-cu126-x86_64-linux/metadata.json +3 -0
  8. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/parallel_style.py +0 -0
  9. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/poly_norm.py +0 -0
  10. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/rms_norm.py +0 -0
  11. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/rms_norm_meta.py +7 -2
  12. build/torch210-cxx11-cu128-x86_64-linux/__init__.py +53 -0
  13. build/{torch28-cxx11-cu129-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-cu128-x86_64-linux/_activation_18b7543_dirty.abi3.so} +2 -2
  14. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/_ops.py +3 -3
  15. build/torch210-cxx11-cu128-x86_64-linux/activation/__init__.py +26 -0
  16. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/fused_add_rms_norm_meta.py +7 -2
  17. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/layers.py +0 -0
  18. build/torch210-cxx11-cu128-x86_64-linux/metadata.json +3 -0
  19. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/parallel_style.py +0 -0
  20. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/poly_norm.py +0 -0
  21. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/rms_norm.py +0 -0
  22. build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/rms_norm_meta.py +7 -2
  23. build/torch210-cxx11-cu130-x86_64-linux/__init__.py +53 -0
  24. build/torch210-cxx11-cu130-x86_64-linux/_activation_18b7543_dirty.abi3.so +3 -0
  25. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/_ops.py +3 -3
  26. build/torch210-cxx11-cu130-x86_64-linux/activation/__init__.py +26 -0
  27. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/fused_add_rms_norm_meta.py +7 -2
  28. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/layers.py +0 -0
  29. build/torch210-cxx11-cu130-x86_64-linux/metadata.json +3 -0
  30. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/parallel_style.py +0 -0
  31. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/poly_norm.py +0 -0
  32. build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/rms_norm.py +0 -0
  33. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/rms_norm_meta.py +7 -2
  34. build/torch210-cxx11-rocm70-x86_64-linux/__init__.py +53 -0
  35. build/{torch28-cxx11-cu126-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-rocm70-x86_64-linux/_activation_18b7543_dirty.abi3.so} +2 -2
  36. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/_ops.py +3 -3
  37. build/torch210-cxx11-rocm70-x86_64-linux/activation/__init__.py +26 -0
  38. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/fused_add_rms_norm_meta.py +7 -2
  39. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/layers.py +0 -0
  40. build/torch210-cxx11-rocm70-x86_64-linux/metadata.json +3 -0
  41. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/parallel_style.py +0 -0
  42. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/poly_norm.py +0 -0
  43. build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/rms_norm.py +0 -0
  44. build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/rms_norm_meta.py +7 -2
  45. build/torch210-cxx11-rocm71-x86_64-linux/__init__.py +53 -0
  46. build/{torch28-cxx11-rocm64-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-rocm71-x86_64-linux/_activation_18b7543_dirty.abi3.so} +2 -2
  47. build/torch210-cxx11-rocm71-x86_64-linux/_ops.py +9 -0
  48. build/torch210-cxx11-rocm71-x86_64-linux/activation/__init__.py +26 -0
  49. build/torch210-cxx11-rocm71-x86_64-linux/fused_add_rms_norm_meta.py +217 -0
  50. build/{torch28-cxx11-rocm64-x86_64-linux/activation β†’ torch210-cxx11-rocm71-x86_64-linux}/layers.py +0 -0
build/torch210-cxx11-cu126-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from . import layers, parallel_style
4
+ from ._ops import ops
5
+ from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
6
+ from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
7
+
8
+
9
+ def poly_norm(
10
+ x: torch.Tensor,
11
+ weight: torch.Tensor,
12
+ bias: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> None:
15
+ return PolyNormFunction.apply(x, weight, bias, eps)
16
+
17
+
18
+ def fused_mul_poly_norm(
19
+ x: torch.Tensor,
20
+ mul: torch.Tensor,
21
+ weight: torch.Tensor,
22
+ bias: torch.Tensor,
23
+ eps: float = 1e-6,
24
+ ) -> None:
25
+ return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
26
+
27
+
28
+ def rms_norm(
29
+ x: torch.Tensor,
30
+ weight: torch.Tensor,
31
+ eps: float = 1e-6,
32
+ ) -> None:
33
+ return RMSNormFunction.apply(x, weight, eps)
34
+
35
+
36
+ def fused_add_rms_norm(
37
+ x: torch.Tensor,
38
+ residual: torch.Tensor,
39
+ weight: torch.Tensor,
40
+ eps: float = 1e-6,
41
+ ) -> None:
42
+ return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
43
+
44
+
45
+ __all__ = [
46
+ "poly_norm",
47
+ "fused_mul_poly_norm",
48
+ "rms_norm",
49
+ "fused_add_rms_norm",
50
+ "layers",
51
+ "parallel_style",
52
+ "ops",
53
+ ]
build/{torch28-cxx11-cu128-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-cu126-x86_64-linux/_activation_18b7543_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e914bbcc41277358db1417cd12162ec7d4d0d63f597c137301f1c57df2a896a
3
- size 10122960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a7e25002120a73ea83ac813276c0518086fae2236f528dadf96bac4876a270
3
+ size 10775296
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_496308d_dirty
3
- ops = torch.ops._activation_496308d_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_496308d_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_18b7543_dirty
3
+ ops = torch.ops._activation_18b7543_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_18b7543_dirty::{op_name}"
build/torch210-cxx11-cu126-x86_64-linux/activation/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/fused_add_rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/layers.py RENAMED
File without changes
build/torch210-cxx11-cu126-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/parallel_style.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/poly_norm.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/rms_norm.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu126-x86_64-linux}/rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/torch210-cxx11-cu128-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from . import layers, parallel_style
4
+ from ._ops import ops
5
+ from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
6
+ from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
7
+
8
+
9
+ def poly_norm(
10
+ x: torch.Tensor,
11
+ weight: torch.Tensor,
12
+ bias: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> None:
15
+ return PolyNormFunction.apply(x, weight, bias, eps)
16
+
17
+
18
+ def fused_mul_poly_norm(
19
+ x: torch.Tensor,
20
+ mul: torch.Tensor,
21
+ weight: torch.Tensor,
22
+ bias: torch.Tensor,
23
+ eps: float = 1e-6,
24
+ ) -> None:
25
+ return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
26
+
27
+
28
+ def rms_norm(
29
+ x: torch.Tensor,
30
+ weight: torch.Tensor,
31
+ eps: float = 1e-6,
32
+ ) -> None:
33
+ return RMSNormFunction.apply(x, weight, eps)
34
+
35
+
36
+ def fused_add_rms_norm(
37
+ x: torch.Tensor,
38
+ residual: torch.Tensor,
39
+ weight: torch.Tensor,
40
+ eps: float = 1e-6,
41
+ ) -> None:
42
+ return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
43
+
44
+
45
+ __all__ = [
46
+ "poly_norm",
47
+ "fused_mul_poly_norm",
48
+ "rms_norm",
49
+ "fused_add_rms_norm",
50
+ "layers",
51
+ "parallel_style",
52
+ "ops",
53
+ ]
build/{torch28-cxx11-cu129-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-cu128-x86_64-linux/_activation_18b7543_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5de606ea13ce268356eeae89b662f2b62a09acd94fe1bd66abea832bdd6d7b9e
3
- size 10147000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078853c2db399a227822ea0c8e70c2e13bad41bfa370657dd19aa2efb3b503e9
3
+ size 15815392
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_496308d_dirty
3
- ops = torch.ops._activation_496308d_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_496308d_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_18b7543_dirty
3
+ ops = torch.ops._activation_18b7543_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_18b7543_dirty::{op_name}"
build/torch210-cxx11-cu128-x86_64-linux/activation/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/fused_add_rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/layers.py RENAMED
File without changes
build/torch210-cxx11-cu128-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/parallel_style.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/poly_norm.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/rms_norm.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/activation β†’ torch210-cxx11-cu128-x86_64-linux}/rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/torch210-cxx11-cu130-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from . import layers, parallel_style
4
+ from ._ops import ops
5
+ from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
6
+ from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
7
+
8
+
9
+ def poly_norm(
10
+ x: torch.Tensor,
11
+ weight: torch.Tensor,
12
+ bias: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> None:
15
+ return PolyNormFunction.apply(x, weight, bias, eps)
16
+
17
+
18
+ def fused_mul_poly_norm(
19
+ x: torch.Tensor,
20
+ mul: torch.Tensor,
21
+ weight: torch.Tensor,
22
+ bias: torch.Tensor,
23
+ eps: float = 1e-6,
24
+ ) -> None:
25
+ return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
26
+
27
+
28
+ def rms_norm(
29
+ x: torch.Tensor,
30
+ weight: torch.Tensor,
31
+ eps: float = 1e-6,
32
+ ) -> None:
33
+ return RMSNormFunction.apply(x, weight, eps)
34
+
35
+
36
+ def fused_add_rms_norm(
37
+ x: torch.Tensor,
38
+ residual: torch.Tensor,
39
+ weight: torch.Tensor,
40
+ eps: float = 1e-6,
41
+ ) -> None:
42
+ return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
43
+
44
+
45
+ __all__ = [
46
+ "poly_norm",
47
+ "fused_mul_poly_norm",
48
+ "rms_norm",
49
+ "fused_add_rms_norm",
50
+ "layers",
51
+ "parallel_style",
52
+ "ops",
53
+ ]
build/torch210-cxx11-cu130-x86_64-linux/_activation_18b7543_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e2c13071e1807a6225c5ad7a4a7eb04d46b1f177ae6344d199a9e7f14daf92
3
+ size 13520952
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_496308d_dirty
3
- ops = torch.ops._activation_496308d_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_496308d_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_18b7543_dirty
3
+ ops = torch.ops._activation_18b7543_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_18b7543_dirty::{op_name}"
build/torch210-cxx11-cu130-x86_64-linux/activation/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/fused_add_rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/layers.py RENAMED
File without changes
build/torch210-cxx11-cu130-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/parallel_style.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/poly_norm.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/rms_norm.py RENAMED
File without changes
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-cu130-x86_64-linux}/rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/torch210-cxx11-rocm70-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from . import layers, parallel_style
4
+ from ._ops import ops
5
+ from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
6
+ from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
7
+
8
+
9
+ def poly_norm(
10
+ x: torch.Tensor,
11
+ weight: torch.Tensor,
12
+ bias: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> None:
15
+ return PolyNormFunction.apply(x, weight, bias, eps)
16
+
17
+
18
+ def fused_mul_poly_norm(
19
+ x: torch.Tensor,
20
+ mul: torch.Tensor,
21
+ weight: torch.Tensor,
22
+ bias: torch.Tensor,
23
+ eps: float = 1e-6,
24
+ ) -> None:
25
+ return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
26
+
27
+
28
+ def rms_norm(
29
+ x: torch.Tensor,
30
+ weight: torch.Tensor,
31
+ eps: float = 1e-6,
32
+ ) -> None:
33
+ return RMSNormFunction.apply(x, weight, eps)
34
+
35
+
36
+ def fused_add_rms_norm(
37
+ x: torch.Tensor,
38
+ residual: torch.Tensor,
39
+ weight: torch.Tensor,
40
+ eps: float = 1e-6,
41
+ ) -> None:
42
+ return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
43
+
44
+
45
+ __all__ = [
46
+ "poly_norm",
47
+ "fused_mul_poly_norm",
48
+ "rms_norm",
49
+ "fused_add_rms_norm",
50
+ "layers",
51
+ "parallel_style",
52
+ "ops",
53
+ ]
build/{torch28-cxx11-cu126-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-rocm70-x86_64-linux/_activation_18b7543_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6981f362de22807e15574e81e19b4816915a0e7a2863818ee6729641e7d28568
3
- size 6742080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ff2b71abb33d840d92116980e519786ed06f1e337d681d0e3301dba241ff63
3
+ size 2919488
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_496308d_dirty
3
- ops = torch.ops._activation_496308d_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_496308d_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_18b7543_dirty
3
+ ops = torch.ops._activation_18b7543_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_18b7543_dirty::{op_name}"
build/torch210-cxx11-rocm70-x86_64-linux/activation/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/fused_add_rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/layers.py RENAMED
File without changes
build/torch210-cxx11-rocm70-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/parallel_style.py RENAMED
File without changes
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/poly_norm.py RENAMED
File without changes
build/{torch28-cxx11-rocm63-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/rms_norm.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/activation β†’ torch210-cxx11-rocm70-x86_64-linux}/rms_norm_meta.py RENAMED
@@ -7,8 +7,13 @@ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
- from torch.distributed.tensor._ops.utils import (generate_redistribute_costs,
11
- register_op_strategy)
 
 
 
 
 
12
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
13
  Shard)
14
 
 
7
  from torch.distributed.tensor._ops._math_ops import (
8
  _infer_reduce_dims_map, _replicate_dims_start_at,
9
  map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
  from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
  Shard)
19
 
build/torch210-cxx11-rocm71-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from . import layers, parallel_style
4
+ from ._ops import ops
5
+ from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
6
+ from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
7
+
8
+
9
+ def poly_norm(
10
+ x: torch.Tensor,
11
+ weight: torch.Tensor,
12
+ bias: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> None:
15
+ return PolyNormFunction.apply(x, weight, bias, eps)
16
+
17
+
18
+ def fused_mul_poly_norm(
19
+ x: torch.Tensor,
20
+ mul: torch.Tensor,
21
+ weight: torch.Tensor,
22
+ bias: torch.Tensor,
23
+ eps: float = 1e-6,
24
+ ) -> None:
25
+ return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
26
+
27
+
28
+ def rms_norm(
29
+ x: torch.Tensor,
30
+ weight: torch.Tensor,
31
+ eps: float = 1e-6,
32
+ ) -> None:
33
+ return RMSNormFunction.apply(x, weight, eps)
34
+
35
+
36
+ def fused_add_rms_norm(
37
+ x: torch.Tensor,
38
+ residual: torch.Tensor,
39
+ weight: torch.Tensor,
40
+ eps: float = 1e-6,
41
+ ) -> None:
42
+ return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
43
+
44
+
45
+ __all__ = [
46
+ "poly_norm",
47
+ "fused_mul_poly_norm",
48
+ "rms_norm",
49
+ "fused_add_rms_norm",
50
+ "layers",
51
+ "parallel_style",
52
+ "ops",
53
+ ]
build/{torch28-cxx11-rocm64-x86_64-linux/activation/_activation_496308d_dirty.abi3.so β†’ torch210-cxx11-rocm71-x86_64-linux/_activation_18b7543_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cfc63ad98abf44fcb2a491d41fe99ea98be7a5c8f59dbacaedbaa32b3700313
3
- size 2794144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4db38e8d5ad56226f5a95a86c2b5fc726bd9d576d07df2f07d3f03c1b6b35b
3
+ size 2911200
build/torch210-cxx11-rocm71-x86_64-linux/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _activation_18b7543_dirty
3
+ ops = torch.ops._activation_18b7543_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_activation_18b7543_dirty::{op_name}"
build/torch210-cxx11-rocm71-x86_64-linux/activation/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/torch210-cxx11-rocm71-x86_64-linux/fused_add_rms_norm_meta.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Sequence
2
+
3
+ import torch
4
+ from torch.distributed.tensor._dtensor_spec import DTensorSpec
5
+ from torch.distributed.tensor._op_schema import (OpSchema, OpSpec, OpStrategy,
6
+ RuntimeSchemaInfo)
7
+ from torch.distributed.tensor._ops._math_ops import (
8
+ _infer_reduce_dims_map, _replicate_dims_start_at,
9
+ map_placements_after_reduction)
10
+ from torch.distributed.tensor._ops.utils import generate_redistribute_costs
11
+
12
+ try:
13
+ from torch.distributed.tensor._ops.utils import register_op_strategy
14
+ except ImportError:
15
+ # PyTorch 2.10+ moved register_op_strategy to a separate module
16
+ from torch.distributed.tensor._ops.registration import register_op_strategy
17
+ from torch.distributed.tensor.placement_types import (Placement, Replicate,
18
+ Shard)
19
+
20
+ from ._ops import ops
21
+
22
+
23
+ def register_fused_add_rms_norm_meta():
24
+ """Dummy function to register the meta functions.
25
+ Registration happens at import time by the decorators below.
26
+ """
27
+ pass
28
+
29
+
30
+ @register_op_strategy(ops.fused_add_rms_norm.default,
31
+ schema_info=RuntimeSchemaInfo(1))
32
+ def fused_add_rms_norm_strategy(op_schema: OpSchema) -> OpStrategy:
33
+ mesh = op_schema.get_mesh_from_args()
34
+
35
+ assert len(op_schema.args_schema) == 4
36
+ (
37
+ input_strategy,
38
+ residual_strategy,
39
+ weight_strategy,
40
+ _, # eps
41
+ ) = op_schema.args_schema
42
+
43
+ assert isinstance(input_strategy, OpStrategy)
44
+ assert isinstance(residual_strategy, OpStrategy)
45
+ assert isinstance(weight_strategy, OpStrategy)
46
+
47
+ lengths = {
48
+ "input": len(input_strategy.strategies),
49
+ "residual": len(residual_strategy.strategies),
50
+ "weight": len(weight_strategy.strategies),
51
+ }
52
+ assert len(set(
53
+ lengths.values())) == 1, f"Strategy length mismatch: {lengths}"
54
+
55
+ last_dim = input_strategy.ndim - 1
56
+ strategy = OpStrategy([])
57
+ for input, residual, weight in zip(input_strategy.strategies,
58
+ residual_strategy.strategies,
59
+ weight_strategy.strategies):
60
+
61
+ input_src = input.output_spec
62
+ residual_src = residual.output_spec
63
+ weight_src = weight.output_spec
64
+
65
+ assert isinstance(input_src, DTensorSpec)
66
+ assert isinstance(residual_src, DTensorSpec)
67
+ assert isinstance(weight_src, DTensorSpec)
68
+
69
+ redistribute_costs = []
70
+
71
+ # Input can be sharded in any dim except the last dim.
72
+ input_tgt = DTensorSpec(
73
+ mesh=mesh,
74
+ placements=_replicate_dims_start_at(input_src.placements,
75
+ last_dim),
76
+ tensor_meta=input_src.tensor_meta,
77
+ )
78
+ redistribute_costs.append(
79
+ generate_redistribute_costs(input_strategy, input_tgt))
80
+
81
+ # Residual add must have the same sharding as input.
82
+ residual_tgt = input_tgt
83
+ redistribute_costs.append(
84
+ generate_redistribute_costs(residual_strategy, residual_tgt))
85
+
86
+ # Weight cannot be sharded, so always replicate it.
87
+ weight_tgt = DTensorSpec(
88
+ mesh=mesh,
89
+ placements=_replicate_dims_start_at(weight_src.placements),
90
+ tensor_meta=weight_src.tensor_meta,
91
+ )
92
+ redistribute_costs.append(
93
+ generate_redistribute_costs(weight_strategy, weight_tgt))
94
+
95
+ strategy.strategies.append(
96
+ OpSpec(
97
+ output_specs=[input_tgt, input_tgt],
98
+ input_specs=[input_tgt, residual_tgt, weight_tgt],
99
+ redistribute_cost=redistribute_costs,
100
+ ))
101
+ return strategy
102
+
103
+
104
+ @register_op_strategy(ops.fused_add_rms_norm_backward.default,
105
+ schema_info=RuntimeSchemaInfo(2))
106
+ def fused_add_rms_norm_backward_strategy(op_schema: OpSchema) -> OpStrategy:
107
+ mesh = op_schema.get_mesh_from_args()
108
+
109
+ assert len(op_schema.args_schema) == 6
110
+ (
111
+ output_grad_strategy,
112
+ add_output_grad_strategy,
113
+ add_output_strategy,
114
+ weight_strategy,
115
+ _, # eps
116
+ need_input_grad, # need_input_grad
117
+ ) = op_schema.args_schema
118
+
119
+ assert isinstance(output_grad_strategy, OpStrategy)
120
+ assert isinstance(add_output_grad_strategy, OpStrategy)
121
+ assert isinstance(add_output_strategy, OpStrategy)
122
+ assert isinstance(weight_strategy, OpStrategy)
123
+
124
+ lengths = {
125
+ "output_grad": len(output_grad_strategy.strategies),
126
+ "add_output_grad": len(add_output_grad_strategy.strategies),
127
+ "add_output": len(add_output_strategy.strategies),
128
+ "weight": len(weight_strategy.strategies),
129
+ }
130
+ assert len(set(
131
+ lengths.values())) == 1, f"Strategy length mismatch: {lengths}"
132
+
133
+ zipped = zip(
134
+ output_grad_strategy.strategies,
135
+ add_output_grad_strategy.strategies,
136
+ add_output_strategy.strategies,
137
+ weight_strategy.strategies,
138
+ )
139
+
140
+ last_dim = output_grad_strategy.ndim - 1
141
+ outer_dims = list(range(last_dim))
142
+
143
+ strategy = OpStrategy([])
144
+ for output_grad, add_output_grad, add_output, weight in zipped:
145
+ output_grad_src = output_grad.output_spec
146
+ add_output_grad_src = add_output_grad.output_spec
147
+ add_output_src = add_output.output_spec
148
+ weight_src = weight.output_spec
149
+
150
+ assert isinstance(output_grad_src, DTensorSpec)
151
+ assert isinstance(add_output_grad_src, DTensorSpec)
152
+ assert isinstance(add_output_src, DTensorSpec)
153
+ assert isinstance(weight_src, DTensorSpec)
154
+
155
+ redistribute_costs = []
156
+
157
+ # output grad can be sharded in any dim except the last dim.
158
+ output_grad_tgt = DTensorSpec(
159
+ mesh=mesh,
160
+ placements=_replicate_dims_start_at(output_grad_src.placements,
161
+ last_dim),
162
+ tensor_meta=output_grad_src.tensor_meta,
163
+ )
164
+ redistribute_costs.append(
165
+ generate_redistribute_costs(output_grad_strategy, output_grad_tgt))
166
+
167
+ # add_output_grad must have the same sharding as output_grad.
168
+ add_output_grad_tgt = output_grad_tgt
169
+ redistribute_costs.append(
170
+ generate_redistribute_costs(add_output_grad_strategy,
171
+ add_output_grad_tgt))
172
+
173
+ # add_output must have the same sharding as output_grad.
174
+ add_output_tgt = output_grad_tgt
175
+ redistribute_costs.append(
176
+ generate_redistribute_costs(add_output_strategy, add_output_tgt))
177
+
178
+ # Weight cannot be sharded, so always replicate it.
179
+ weight_tgt = DTensorSpec(
180
+ mesh=mesh,
181
+ placements=_replicate_dims_start_at(weight_src.placements),
182
+ tensor_meta=weight_src.tensor_meta,
183
+ )
184
+ redistribute_costs.append(
185
+ generate_redistribute_costs(weight_strategy, weight_tgt))
186
+
187
+ # from torch/distributed/tensor/_ops/_math_ops.py::layer_norm_bwd_strategy()
188
+
189
+ # Weight cannot be sharded, so always replicate it.
190
+ # TODO: now d_weight spec follows input spec w/ a reduction.
191
+ # we may need to change to a pointwise rule over grad_out and
192
+ # input, then apply a reduction.
193
+ inp_placements = _replicate_dims_start_at(output_grad_src.placements,
194
+ last_dim)
195
+ reduce_dims_map = _infer_reduce_dims_map(outer_dims,
196
+ output_grad_src.ndim, False)
197
+ out_placements = map_placements_after_reduction(
198
+ inp_placements, outer_dims, reduce_dims_map, "sum")
199
+ weight_grad_tgt = DTensorSpec(
200
+ mesh=mesh,
201
+ placements=out_placements,
202
+ tensor_meta=weight_src.tensor_meta,
203
+ )
204
+
205
+ strategy.strategies.append(
206
+ OpSpec(
207
+ output_specs=[
208
+ output_grad_tgt if need_input_grad else None,
209
+ weight_grad_tgt
210
+ ],
211
+ input_specs=[
212
+ output_grad_tgt, add_output_grad_tgt, add_output_tgt,
213
+ weight_tgt
214
+ ],
215
+ redistribute_cost=redistribute_costs,
216
+ ))
217
+ return strategy
build/{torch28-cxx11-rocm64-x86_64-linux/activation β†’ torch210-cxx11-rocm71-x86_64-linux}/layers.py RENAMED
File without changes