danieldk HF Staff commited on
Commit
1ff8e83
·
verified ·
1 Parent(s): be90cf1

Build uploaded using `kernels`.

Browse files
Files changed (45) hide show
  1. build/torch210-cxx11-cu126-x86_64-linux/__init__.py +0 -63
  2. build/torch210-cxx11-cu126-x86_64-linux/_ops.py +0 -9
  3. build/torch210-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  4. build/torch210-cxx11-cu126-x86_64-linux/metadata.json +0 -1
  5. build/torch210-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  6. build/torch210-cxx11-cu128-x86_64-linux/__init__.py +0 -63
  7. build/torch210-cxx11-cu128-x86_64-linux/_ops.py +0 -9
  8. build/torch210-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  9. build/torch210-cxx11-cu128-x86_64-linux/metadata.json +0 -1
  10. build/torch210-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  11. build/torch210-cxx11-cu130-x86_64-linux/__init__.py +0 -63
  12. build/torch210-cxx11-cu130-x86_64-linux/_ops.py +0 -9
  13. build/torch210-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  14. build/torch210-cxx11-cu130-x86_64-linux/metadata.json +0 -1
  15. build/torch210-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  16. build/torch28-cxx11-cu126-x86_64-linux/__init__.py +0 -63
  17. build/torch28-cxx11-cu126-x86_64-linux/_ops.py +0 -9
  18. build/torch28-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  19. build/torch28-cxx11-cu126-x86_64-linux/metadata.json +0 -1
  20. build/torch28-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  21. build/torch28-cxx11-cu128-x86_64-linux/__init__.py +0 -63
  22. build/torch28-cxx11-cu128-x86_64-linux/_ops.py +0 -9
  23. build/torch28-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  24. build/torch28-cxx11-cu128-x86_64-linux/metadata.json +0 -1
  25. build/torch28-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  26. build/torch28-cxx11-cu129-x86_64-linux/__init__.py +0 -63
  27. build/torch28-cxx11-cu129-x86_64-linux/_ops.py +0 -9
  28. build/torch28-cxx11-cu129-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  29. build/torch28-cxx11-cu129-x86_64-linux/metadata.json +0 -1
  30. build/torch28-cxx11-cu129-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  31. build/torch29-cxx11-cu126-x86_64-linux/__init__.py +0 -63
  32. build/torch29-cxx11-cu126-x86_64-linux/_ops.py +0 -9
  33. build/torch29-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  34. build/torch29-cxx11-cu126-x86_64-linux/metadata.json +0 -1
  35. build/torch29-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  36. build/torch29-cxx11-cu128-x86_64-linux/__init__.py +0 -63
  37. build/torch29-cxx11-cu128-x86_64-linux/_ops.py +0 -9
  38. build/torch29-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  39. build/torch29-cxx11-cu128-x86_64-linux/metadata.json +0 -1
  40. build/torch29-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py +0 -26
  41. build/torch29-cxx11-cu130-x86_64-linux/__init__.py +0 -63
  42. build/torch29-cxx11-cu130-x86_64-linux/_ops.py +0 -9
  43. build/torch29-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
  44. build/torch29-cxx11-cu130-x86_64-linux/metadata.json +0 -1
  45. build/torch29-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch210-cxx11-cu126-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu126-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4696f06074607161504dbc084412b8290460ab7cd9f653f34249c02ec3683728
3
- size 2123408
 
 
 
 
build/torch210-cxx11-cu126-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch210-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu128-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu128-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3efdf652f388edb9448c80ecdcc7424364444923b39381fdbb0e44f6d56c1d
3
- size 2244024
 
 
 
 
build/torch210-cxx11-cu128-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch210-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu130-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu130-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch210-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5b54cd80f22b8778fe97ef7f461969e52300fa054b6ee180bcd46a264a454b2
3
- size 2245832
 
 
 
 
build/torch210-cxx11-cu130-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch210-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu126-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu126-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab4b614ba96a5ac6516c533cfa22aba664838f7e8b338726061f4de8b7313ce
3
- size 2116936
 
 
 
 
build/torch28-cxx11-cu126-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch28-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b46f6034490e99711922f86c6cc713669ee7d6d1b93921d7ae9200a50b41a32c
3
- size 2229096
 
 
 
 
build/torch28-cxx11-cu128-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch28-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c16ba6a6a761358d86098effc0ec3cb2d45af3dc8752093fced42b0251283b01
3
- size 2262880
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch28-cxx11-cu129-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu126-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu126-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:422c66e3e96aaa06ef29eb35377283a085cff0c020fb6547419b7ff9b8e46706
3
- size 2116912
 
 
 
 
build/torch29-cxx11-cu126-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch29-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu128-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu128-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf4a7f4abb4581e9b854a40da441efea3b8fa5f7b3803decd2d3a69c1e302e42
3
- size 2233160
 
 
 
 
build/torch29-cxx11-cu128-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch29-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu130-x86_64-linux/__init__.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- def tinygrad_rms_norm(
9
- x: torch.Tensor,
10
- epsilon: float = 1e-6,
11
- out: Optional[torch.Tensor] = None,
12
- ) -> Tuple[torch.Tensor, torch.Tensor]:
13
- """
14
- Compute RMSNorm using tinygrad-style CUDA kernels.
15
-
16
- RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
17
-
18
- This implementation uses a two-kernel approach:
19
- 1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
20
- 2. Multiply input by the computed factor
21
-
22
- Args:
23
- x: Input tensor of shape (..., hidden_size)
24
- epsilon: Small constant for numerical stability
25
- out: Optional pre-allocated output tensor
26
-
27
- Returns:
28
- Tuple of (output tensor, rms_inv tensor)
29
- """
30
- if out is None:
31
- out = torch.empty_like(x)
32
-
33
- hidden_size = x.size(-1)
34
- num_rows = x.numel() // hidden_size
35
- rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
36
-
37
- ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
38
- return out, rms_inv
39
-
40
-
41
- def tinygrad_rms_norm_simple(
42
- x: torch.Tensor,
43
- epsilon: float = 1e-6,
44
- out: Optional[torch.Tensor] = None,
45
- ) -> torch.Tensor:
46
- """
47
- Compute RMSNorm using tinygrad-style CUDA kernels.
48
-
49
- This is a simpler interface that only returns the normalized output.
50
-
51
- Args:
52
- x: Input tensor of shape (..., hidden_size)
53
- epsilon: Small constant for numerical stability
54
- out: Optional pre-allocated output tensor
55
-
56
- Returns:
57
- Normalized output tensor
58
- """
59
- if out is None:
60
- out = torch.empty_like(x)
61
-
62
- ops.tinygrad_rms_norm_inplace(out, x, epsilon)
63
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu130-x86_64-linux/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _tinygrad_rms_45fdbd5
3
- ops = torch.ops._tinygrad_rms_45fdbd5
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_tinygrad_rms_45fdbd5::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch29-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a518e5985b488c6d89a85d6402c634a22bdf26a98033e6e63c5a49cc42767bcf
3
- size 2234864
 
 
 
 
build/torch29-cxx11-cu130-x86_64-linux/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"python-depends":[]}
 
 
build/torch29-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))