Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

build/torch-neuron/__init__.py +67 -0
build/torch-neuron/_ops.py +8 -0
build/torch-neuron/layers/__init__.py +27 -0
build/torch-neuron/metadata-neuron.json +4 -0
build/torch-neuron/nki_kernels/__init__.py +26 -0

build/torch-neuron/__init__.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+from nkilib.core.mlp.mlp import mlp
+from nkilib.core.rmsnorm.rmsnorm_quant import rmsnorm_quant_kernel, RmsNormQuantKernelArgs
+from nkilib.core.utils.common_types import ActFnType, NormType, QuantizationType
+from ._ops import ops
+from . import layers
+def mlp_kernel(x, gate_proj_weight, up_proj_weight, down_proj_weight, activation_fn):
+    x_dtype = x.dtype
+    dtype = torch.bfloat16
+    # class ActFnType(Enum):\n    SiLU = 0\n    GELU = 1\n    GELU_Tanh_Approx = 2\n    Swish = 3\n'
+    if activation_fn.lower() == "silu":
+        act_fn = ActFnType.SiLU
+    elif activation_fn.lower() == "gelu":
+        act_fn = ActFnType.GELU
+    elif activation_fn.lower() == "gelu_pytorch_tanh":
+        act_fn = ActFnType.GELU_Tanh_Approx
+    elif activation_fn.lower() == "swish":
+        act_fn = ActFnType.Swish
+    else:
+        raise Exception(f"Activation function not supported: {activation_fn}")
+    return mlp(
+        x.to(dtype),
+        gate_proj_weight.transpose(1,0).to(dtype),
+        up_proj_weight.transpose(1,0).to(dtype),
+        down_proj_weight.transpose(1,0).to(dtype),
+        activation_fn=act_fn,
+    ).to(x_dtype)
+def rmsnorm_kernel(hidden, ln_weight, epsilon):
+    hidden_dtype = hidden.dtype
+    dtype = torch.bfloat16
+    #from collections import namedtuple
+    #RmsNormQuantKernelArgs_ = namedtuple('RmsNormQuantKernelArgs_', 'quantization_type lower_bound norm_type eps')
+    #kernel_args = RmsNormQuantKernelArgs_(
+    #    quantization_type=QuantizationType.ROW,
+    #    lower_bound=0.0,
+    #    norm_type=NormType.RMS_NORM,
+    #    eps=epsilon
+    #)
+    kernel_args = RmsNormQuantKernelArgs(
+        quantization_type=QuantizationType.ROW, lower_bound=0.0, norm_type=NormType.RMS_NORM, eps=1e-6
+    )
+    print(kernel_args.eps)
+    #kernel_args = RmsNormQuantKernelArgs(
+    #    "quantization_type": QuantizationType.ROW,
+    #    "lower_bound": 0.0,
+    #    "norm_type": NormType.RMS_NORM,
+    #    "eps": epsilon
+    #}
+    return rmsnorm_quant_kernel(
+        hidden=hidden.to(dtype),
+        ln_w=ln_weight.to(dtype),
+        kargs=kernel_args,
+        #input_dequant_scale=None
+    ).to(hidden_dtype)
+__all__ = [
+    "layers",
+    "MLP",
+    "RMSNorm"
+]

build/torch-neuron/_ops.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import torch
+ops = torch.ops._nki_kernels_5abad9b
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_nki_kernels_5abad9b::{op_name}"

build/torch-neuron/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+import torch.nn as nn
+import logging
+from .. import nki_kernels
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+class MLP(nn.Module):
+    config: object
+    gate_proj: torch.Tensor
+    up_proj: torch.Tensor
+    down_proj: torch.Tensor
+    act_fn: object
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return nki_kernels.mlp_kernel(x, self.gate_proj.weight, self.up_proj.weight, self.down_proj.weight, self.config.hidden_act)
+class RMSNorm(nn.Module):
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        return nki_kernels.rmsnorm_kernel(hidden_stats, self.weight, self.variance_epsilon)
+    def extra_repr(self):
+        return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"

build/torch-neuron/metadata-neuron.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "version": 1,
+  "python-depends": []
+}

build/torch-neuron/nki_kernels/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))