danieldk HF Staff commited on Jan 7

Commit

1ff8e83

verified ·

1 Parent(s): be90cf1

Build uploaded using `kernels`.

Browse files

Files changed (45) hide show

build/torch210-cxx11-cu126-x86_64-linux/__init__.py +0 -63
build/torch210-cxx11-cu126-x86_64-linux/_ops.py +0 -9
build/torch210-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch210-cxx11-cu126-x86_64-linux/metadata.json +0 -1
build/torch210-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch210-cxx11-cu128-x86_64-linux/__init__.py +0 -63
build/torch210-cxx11-cu128-x86_64-linux/_ops.py +0 -9
build/torch210-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch210-cxx11-cu128-x86_64-linux/metadata.json +0 -1
build/torch210-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch210-cxx11-cu130-x86_64-linux/__init__.py +0 -63
build/torch210-cxx11-cu130-x86_64-linux/_ops.py +0 -9
build/torch210-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch210-cxx11-cu130-x86_64-linux/metadata.json +0 -1
build/torch210-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch28-cxx11-cu126-x86_64-linux/__init__.py +0 -63
build/torch28-cxx11-cu126-x86_64-linux/_ops.py +0 -9
build/torch28-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch28-cxx11-cu126-x86_64-linux/metadata.json +0 -1
build/torch28-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch28-cxx11-cu128-x86_64-linux/__init__.py +0 -63
build/torch28-cxx11-cu128-x86_64-linux/_ops.py +0 -9
build/torch28-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch28-cxx11-cu128-x86_64-linux/metadata.json +0 -1
build/torch28-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch28-cxx11-cu129-x86_64-linux/__init__.py +0 -63
build/torch28-cxx11-cu129-x86_64-linux/_ops.py +0 -9
build/torch28-cxx11-cu129-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch28-cxx11-cu129-x86_64-linux/metadata.json +0 -1
build/torch28-cxx11-cu129-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch29-cxx11-cu126-x86_64-linux/__init__.py +0 -63
build/torch29-cxx11-cu126-x86_64-linux/_ops.py +0 -9
build/torch29-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch29-cxx11-cu126-x86_64-linux/metadata.json +0 -1
build/torch29-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch29-cxx11-cu128-x86_64-linux/__init__.py +0 -63
build/torch29-cxx11-cu128-x86_64-linux/_ops.py +0 -9
build/torch29-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch29-cxx11-cu128-x86_64-linux/metadata.json +0 -1
build/torch29-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py +0 -26
build/torch29-cxx11-cu130-x86_64-linux/__init__.py +0 -63
build/torch29-cxx11-cu130-x86_64-linux/_ops.py +0 -9
build/torch29-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so +0 -3
build/torch29-cxx11-cu130-x86_64-linux/metadata.json +0 -1
build/torch29-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py +0 -26

build/torch210-cxx11-cu126-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch210-cxx11-cu126-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch210-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4696f06074607161504dbc084412b8290460ab7cd9f653f34249c02ec3683728
-size 2123408

build/torch210-cxx11-cu126-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch210-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu128-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch210-cxx11-cu128-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch210-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2a3efdf652f388edb9448c80ecdcc7424364444923b39381fdbb0e44f6d56c1d
-size 2244024

build/torch210-cxx11-cu128-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch210-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu130-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch210-cxx11-cu130-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch210-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c5b54cd80f22b8778fe97ef7f461969e52300fa054b6ee180bcd46a264a454b2
-size 2245832

build/torch210-cxx11-cu130-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch210-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch28-cxx11-cu126-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch28-cxx11-cu126-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch28-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6ab4b614ba96a5ac6516c533cfa22aba664838f7e8b338726061f4de8b7313ce
-size 2116936

build/torch28-cxx11-cu126-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch28-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch28-cxx11-cu128-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch28-cxx11-cu128-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch28-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b46f6034490e99711922f86c6cc713669ee7d6d1b93921d7ae9200a50b41a32c
-size 2229096

build/torch28-cxx11-cu128-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch28-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch28-cxx11-cu129-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch28-cxx11-cu129-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch28-cxx11-cu129-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c16ba6a6a761358d86098effc0ec3cb2d45af3dc8752093fced42b0251283b01
-size 2262880

build/torch28-cxx11-cu129-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch28-cxx11-cu129-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch29-cxx11-cu126-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch29-cxx11-cu126-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch29-cxx11-cu126-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:422c66e3e96aaa06ef29eb35377283a085cff0c020fb6547419b7ff9b8e46706
-size 2116912

build/torch29-cxx11-cu126-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch29-cxx11-cu126-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch29-cxx11-cu128-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch29-cxx11-cu128-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch29-cxx11-cu128-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cf4a7f4abb4581e9b854a40da441efea3b8fa5f7b3803decd2d3a69c1e302e42
-size 2233160

build/torch29-cxx11-cu128-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch29-cxx11-cu128-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch29-cxx11-cu130-x86_64-linux/__init__.py DELETED Viewed

@@ -1,63 +0,0 @@
-from typing import Optional, Tuple
-import torch
-from ._ops import ops
-def tinygrad_rms_norm(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    RMSNorm(x) = x * (1 / sqrt(mean(x^2) + epsilon))
-    This implementation uses a two-kernel approach:
-    1. Compute 1/sqrt(mean(x^2) + epsilon) for each row
-    2. Multiply input by the computed factor
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Tuple of (output tensor, rms_inv tensor)
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    hidden_size = x.size(-1)
-    num_rows = x.numel() // hidden_size
-    rms_inv = torch.empty(num_rows, dtype=x.dtype, device=x.device)
-    ops.tinygrad_rms_norm(out, rms_inv, x, epsilon)
-    return out, rms_inv
-def tinygrad_rms_norm_simple(
-    x: torch.Tensor,
-    epsilon: float = 1e-6,
-    out: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Compute RMSNorm using tinygrad-style CUDA kernels.
-    This is a simpler interface that only returns the normalized output.
-    Args:
-        x: Input tensor of shape (..., hidden_size)
-        epsilon: Small constant for numerical stability
-        out: Optional pre-allocated output tensor
-    Returns:
-        Normalized output tensor
-    """
-    if out is None:
-        out = torch.empty_like(x)
-    ops.tinygrad_rms_norm_inplace(out, x, epsilon)
-    return out

build/torch29-cxx11-cu130-x86_64-linux/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _tinygrad_rms_45fdbd5
-ops = torch.ops._tinygrad_rms_45fdbd5
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_tinygrad_rms_45fdbd5::{op_name}"

build/torch29-cxx11-cu130-x86_64-linux/_tinygrad_rms_45fdbd5.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a518e5985b488c6d89a85d6402c634a22bdf26a98033e6e63c5a49cc42767bcf
-size 2234864

build/torch29-cxx11-cu130-x86_64-linux/metadata.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"python-depends":[]}

build/torch29-cxx11-cu130-x86_64-linux/tinygrad_rms/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-import ctypes
-import sys
-import importlib
-from pathlib import Path
-from types import ModuleType
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))