diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so deleted file mode 100755 index c1e52a91b4fa56b4ff39c854b33497b094135599..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b6ba32ecc6fc898df3b0cebee85e9afc6881749fe58142280f051ca3332d913 -size 2546864 diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py deleted file mode 100644 index 0110324ade19f59f705c61d5c21912c958e92e96..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from . import _activation_be5bedb -ops = torch.ops._activation_be5bedb - -def add_op_namespace_prefix(op_name: str): - """ - Prefix op by namespace. - """ - return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py deleted file mode 100644 index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +++ /dev/null @@ -1,128 +0,0 @@ -import torch -import torch.nn as nn - -from ._ops import ops - - -class SiluAndMul(nn.Module): - """An activation function for SwiGLU. - - The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.silu_and_mul(out, x) - return out - - -class MulAndSilu(nn.Module): - """An activation function for SwiGLU. - - The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.mul_and_silu(out, x) - return out - - -class GeluAndMul(nn.Module): - """An activation function for GeGLU. - - The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. - - Shapes: - x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) - return: (batch_size, seq_len, d) or (num_tokens, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.gelu_and_mul(out, x) - return out - - -class GeluTanhAndMul(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.gelu_tanh_and_mul(out, x) - return out - - -class FatreluAndMul(nn.Module): - """An activation function for FATReLU. - - The function computes x -> FATReLU(x[:d]) * x[d:] where - d = x.shape[-1] // 2. - This is used in openbmb/MiniCPM-S-1B-sft. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def __init__(self, threshold: float = 0.0): - super().__init__() - self.threshold = threshold - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.fatrelu_and_mul(out, x, self.threshold) - return out - - -class FastGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_fast(out, x) - return out - - -class NewGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_new(out, x) - return out - - -class QuickGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_quick(out, x) - return out diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so deleted file mode 100755 index f45a6ffcf3f11e3b24919496e213a61acb258d2a..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:331dcb3900d5e47a11d3577cdbac54f15a0b6e14910239293323c1d9e4eb9f49 -size 2616928 diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py deleted file mode 100644 index 0110324ade19f59f705c61d5c21912c958e92e96..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from . import _activation_be5bedb -ops = torch.ops._activation_be5bedb - -def add_op_namespace_prefix(op_name: str): - """ - Prefix op by namespace. - """ - return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so deleted file mode 100755 index 12f5777398872e7a3d93ab936e42ade8eeec3213..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ce11492b9675a44afb3b896ed80e425f2a47e29481c4aad9c4a6ac59520f011 -size 2621472 diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py deleted file mode 100644 index 0110324ade19f59f705c61d5c21912c958e92e96..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from . import _activation_be5bedb -ops = torch.ops._activation_be5bedb - -def add_op_namespace_prefix(op_name: str): - """ - Prefix op by namespace. - """ - return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py deleted file mode 100644 index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -import torch - -from ._ops import ops - -from . import layers - - -def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.silu_and_mul(out, x) - return out - - -def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: - ops.mul_and_silu(out, x) - return out - - -def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_and_mul(out, x) - return out - - -def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_tanh_and_mul(out, x) - return out - - -def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: - ops.fatrelu_and_mul(out, x, threshold) - return out - - -def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_fast(out, x) - return out - - -def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_new(out, x) - return out - - -def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_quick(out, x) - return out - - -__all__ = [ - "silu_and_mul", - "gelu_and_mul", - "gelu_tanh_and_mul", - "fatrelu_and_mul", - "gelu_fast", - "gelu_new", - "gelu_quick", - "layers", -] diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so deleted file mode 100755 index 056de26936949cc36baf3caa9c4212d730da81f7..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:434bd1ae43b7cbdb10d86b82da9a237ec05ef9d9fb4fc15cdc9096d3d5ed3fa7 -size 2539352 diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py deleted file mode 100644 index 0110324ade19f59f705c61d5c21912c958e92e96..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from . import _activation_be5bedb -ops = torch.ops._activation_be5bedb - -def add_op_namespace_prefix(op_name: str): - """ - Prefix op by namespace. - """ - return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py deleted file mode 100644 index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -import torch - -from ._ops import ops - -from . import layers - - -def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.silu_and_mul(out, x) - return out - - -def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: - ops.mul_and_silu(out, x) - return out - - -def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_and_mul(out, x) - return out - - -def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_tanh_and_mul(out, x) - return out - - -def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: - ops.fatrelu_and_mul(out, x, threshold) - return out - - -def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_fast(out, x) - return out - - -def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_new(out, x) - return out - - -def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_quick(out, x) - return out - - -__all__ = [ - "silu_and_mul", - "gelu_and_mul", - "gelu_tanh_and_mul", - "fatrelu_and_mul", - "gelu_fast", - "gelu_new", - "gelu_quick", - "layers", -] diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so deleted file mode 100755 index c31190f8f2be87dbb5d5a9c497c68cea2258fded..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53ddfb42466bfe01feb98348f5c2d6beefd589aeb3dec4c5c36609e11a6bde4c -size 2605136 diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py deleted file mode 100644 index 0110324ade19f59f705c61d5c21912c958e92e96..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from . import _activation_be5bedb -ops = torch.ops._activation_be5bedb - -def add_op_namespace_prefix(op_name: str): - """ - Prefix op by namespace. - """ - return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py deleted file mode 100644 index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +++ /dev/null @@ -1,128 +0,0 @@ -import torch -import torch.nn as nn - -from ._ops import ops - - -class SiluAndMul(nn.Module): - """An activation function for SwiGLU. - - The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.silu_and_mul(out, x) - return out - - -class MulAndSilu(nn.Module): - """An activation function for SwiGLU. - - The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.mul_and_silu(out, x) - return out - - -class GeluAndMul(nn.Module): - """An activation function for GeGLU. - - The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. - - Shapes: - x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) - return: (batch_size, seq_len, d) or (num_tokens, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.gelu_and_mul(out, x) - return out - - -class GeluTanhAndMul(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.gelu_tanh_and_mul(out, x) - return out - - -class FatreluAndMul(nn.Module): - """An activation function for FATReLU. - - The function computes x -> FATReLU(x[:d]) * x[d:] where - d = x.shape[-1] // 2. - This is used in openbmb/MiniCPM-S-1B-sft. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def __init__(self, threshold: float = 0.0): - super().__init__() - self.threshold = threshold - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.fatrelu_and_mul(out, x, self.threshold) - return out - - -class FastGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_fast(out, x) - return out - - -class NewGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_new(out, x) - return out - - -class QuickGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_quick(out, x) - return out diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py deleted file mode 100644 index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -import torch - -from ._ops import ops - -from . import layers - - -def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.silu_and_mul(out, x) - return out - - -def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: - ops.mul_and_silu(out, x) - return out - - -def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_and_mul(out, x) - return out - - -def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_tanh_and_mul(out, x) - return out - - -def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: - ops.fatrelu_and_mul(out, x, threshold) - return out - - -def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_fast(out, x) - return out - - -def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_new(out, x) - return out - - -def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: - ops.gelu_quick(out, x) - return out - - -__all__ = [ - "silu_and_mul", - "gelu_and_mul", - "gelu_tanh_and_mul", - "fatrelu_and_mul", - "gelu_fast", - "gelu_new", - "gelu_quick", - "layers", -] diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so deleted file mode 100755 index 516f085e9ac787a2454fb78975dbaec25d2a6576..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac7174352dea307231f308c84ca32ee001cdbcefd976de860e76501c52aae591 -size 2613776 diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py deleted file mode 100644 index 0110324ade19f59f705c61d5c21912c958e92e96..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from . import _activation_be5bedb -ops = torch.ops._activation_be5bedb - -def add_op_namespace_prefix(op_name: str): - """ - Prefix op by namespace. - """ - return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py deleted file mode 100644 index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +++ /dev/null @@ -1,128 +0,0 @@ -import torch -import torch.nn as nn - -from ._ops import ops - - -class SiluAndMul(nn.Module): - """An activation function for SwiGLU. - - The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.silu_and_mul(out, x) - return out - - -class MulAndSilu(nn.Module): - """An activation function for SwiGLU. - - The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.mul_and_silu(out, x) - return out - - -class GeluAndMul(nn.Module): - """An activation function for GeGLU. - - The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. - - Shapes: - x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) - return: (batch_size, seq_len, d) or (num_tokens, d) - """ - - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.gelu_and_mul(out, x) - return out - - -class GeluTanhAndMul(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.gelu_tanh_and_mul(out, x) - return out - - -class FatreluAndMul(nn.Module): - """An activation function for FATReLU. - - The function computes x -> FATReLU(x[:d]) * x[d:] where - d = x.shape[-1] // 2. - This is used in openbmb/MiniCPM-S-1B-sft. - - Shapes: - x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) - return: (num_tokens, d) or (batch_size, seq_len, d) - """ - - can_torch_compile: bool = True - - def __init__(self, threshold: float = 0.0): - super().__init__() - self.threshold = threshold - - def forward(self, x: torch.Tensor): - d = x.shape[-1] // 2 - output_shape = x.shape[:-1] + (d,) - out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - ops.fatrelu_and_mul(out, x, self.threshold) - return out - - -class FastGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_fast(out, x) - return out - - -class NewGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_new(out, x) - return out - - -class QuickGELU(nn.Module): - can_torch_compile: bool = True - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = torch.empty_like(x) - ops.gelu_quick(out, x) - return out diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 3a6358b82d007fa92ac419a82b73a371a184992c..bbf3ad846a76e365312ad965559a177976801396 100644 Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index aa07da5459427811e64acc67e85be6a1a5d8109d..47765ef8e985a500bbb3e25990387a1f1f15c767 100644 Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 09398aaf4f3214cbf0c6b079dc7c7f6d2c12e109..de62862184381714910c79ecdf8db3ca14f8a753 100644 Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so deleted file mode 100755 index 707666b73feb1d1a677d21840923c0146c316f66..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:618cdba5f19eabc1f9c1d33e130ef03ab1b11b52f1e7b00b73f2a10d5cf1e62f -size 2773664 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..c6c9665f880b574481be0f6464ac7637e732df84 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce06ec284ecd4ac5423d3822a60cd9eeb686d0054b38d66567de73e1137b0567 +size 2773632 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py index a24764a95a7a5490ca596cd418d5ce2c2591c906..4d722bffa37106dd2bfdb75db14408c7eecefcb0 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_20250917153858 -ops = torch.ops._activation_20250917153858 +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_20250917153858::{op_name}" \ No newline at end of file + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 0c4d3787b1aeba2c506fc491aaa28cbb5dbf9ac6..29e76b5c619af9b19c5650edcfd4f63c4725d35f 100644 Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 3aed458254d1ebba49b19df3d2984ea7ce30556f..f54053b63e8c2b7598967b6ca9739ecc85d6142a 100644 Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 4fe6da8188a01106d53124e5bcb3b53d1dc0e509..4d4a3c1172a3a2b4c954199c9762b3251d1c468c 100644 Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so deleted file mode 100755 index b1d622e9f768e1d07dc670ad89deb0de15a8a46a..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87ee9280b670b3323378c17d75ee7082f419987a568769fe8479bf509ee6c245 -size 2852232 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..e9e9102689a8ddf42f881abedcd19e137f22d5e4 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a529bd105aca5081398d63329e829b6b159570424cd654d3a9f275ca9a720e82 +size 2852200 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py index a24764a95a7a5490ca596cd418d5ce2c2591c906..4d722bffa37106dd2bfdb75db14408c7eecefcb0 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_20250917153858 -ops = torch.ops._activation_20250917153858 +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_20250917153858::{op_name}" \ No newline at end of file + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 09ba7d3df59ba0e6bb6f28483d8d9d066e736296..364976ff5017b183a827c0dfcda90becfbab0e7c 100644 Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 76b49d8e1d63e6bc3eab559ae97d3dd57281a675..008e1b91db1ae539587989af1a212f9cd38a1ae2 100644 Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 13146c78d42a18877fe1041ac8469d766158775e..d00f03a5b9a4944132d13ac0986acc2c54e0ca3c 100644 Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so deleted file mode 100755 index 9830157016a530b7cfeac9d15d361a7c2cffeffd..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:28ca9a3e35c49ae49694d7c6c77f85f3664622cad9c857bf13dfbf3bc144ae1b -size 4127912 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..6d8adc0f26f3b10cbc1b441b74bc7f49c0ebdaae --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2cffcb6b5b9a49f03a2df46fc2ad36765676edecb468c233e78e1f5e21e206 +size 4127872 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py index a24764a95a7a5490ca596cd418d5ce2c2591c906..4d722bffa37106dd2bfdb75db14408c7eecefcb0 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_20250917153858 -ops = torch.ops._activation_20250917153858 +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_20250917153858::{op_name}" \ No newline at end of file + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 9b1754cfdb6ad5edfe73ae99dcd829df47bbbe92..e8f8e706b1057711ae9e53bf255aa392d9356d5b 100644 Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index cb5b93c070c1bc3449aeddfd7bc67f3e73ce0671..ca11e4cda13d6d4f0a9f8a37d7188d53380ddde2 100644 Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index d3c18f3d02cc0af239075a590f1f1232c7bb61f8..e906e10360ab9b669e4add9e39cb9ce133ca04f6 100644 Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so deleted file mode 100755 index 2ffd19a1b43e107e6703a009dfa85619524754b9..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fcd47dd765bba10bb09f65388f6c1b357b117b2611c17bae5bf8214499a9e39 -size 2837224 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..7c3397feac6fa683af5617d944ea5e6f5f42bf1b --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972be0b2b7ce4f771028406367437488743dc81d70e6316e7a2694df1422b23d +size 2837192 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py index a24764a95a7a5490ca596cd418d5ce2c2591c906..4d722bffa37106dd2bfdb75db14408c7eecefcb0 100644 --- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_20250917153858 -ops = torch.ops._activation_20250917153858 +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_20250917153858::{op_name}" \ No newline at end of file + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index a7fd63365a953f7804b2a89b5dda50cd506a0fdc..d12dd70b4a1174dc45b09641f8a67395f73f2052 100644 Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index fe47bb82e8371e3dba3018517aec31b669970d04..e5ad25122dbe45d007132c05ad491272043aff5a 100644 Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 232694fed7e1ea130e0cfcb18f219a62a996c206..55353ba18a89c372e3738c44597e1c129e955e3f 100644 Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so deleted file mode 100755 index a86c4c4db41ceacc50bb8a05ab438c747a8ef0ab..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e6d88c71eebabc842f6a566de7cfaf24d3d90a30572eae584a3b51dcb7e838e -size 4117000 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..f12d8ce6414b9517c65869fe83bb570a87480d74 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0756eb56dab9c57cc1aa01cfc2301d508fdf11ac4d02d015f7c16dd2246f2f +size 4116960 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py index a24764a95a7a5490ca596cd418d5ce2c2591c906..4d722bffa37106dd2bfdb75db14408c7eecefcb0 100644 --- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_20250917153858 -ops = torch.ops._activation_20250917153858 +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_20250917153858::{op_name}" \ No newline at end of file + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index ed1db9c86882966d57ed36a0ed55bc4b2ca19321..cbbd7d5ff58d32b11600b3114e01c9f049ac553a 100644 Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 5241c54af2fe7946d1a0fd85a475d0d3ca40a4cf..6239d94f12316596571aa36b5f80073c4b3001c4 100644 Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index f6d111cf4f598453f07c754bf3bce7d50cafbff8..7c61641f68aa6668f378809762977aac8344e655 100644 Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so deleted file mode 100755 index 56bc6e0d6cb4f9b4e7260eab9be147746e14bd98..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f3c1b86db31b04bd5fe75b0c9d6915ba2766a2456ea9bd1a20f2d75c4b1acf35 -size 4154880 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..df6a901f09b0db5c03a0dea245c2500eb9a4b05a --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de346c02f046cbb177556580efc9994632adad1439bb90f451f2f690e326c39c +size 4154840 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py index a24764a95a7a5490ca596cd418d5ce2c2591c906..4d722bffa37106dd2bfdb75db14408c7eecefcb0 100644 --- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_20250917153858 -ops = torch.ops._activation_20250917153858 +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_20250917153858::{op_name}" \ No newline at end of file + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py similarity index 76% rename from build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py rename to build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +++ b/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8701dcb62a9afdfff0bf2da0b13995a2f4052dc2 Binary files /dev/null and b/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4a13d5d1bf25ab58915502dc566b8de851bc021 Binary files /dev/null and b/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d7a5ecaadd06dac28e818f8290b371c1294f7a4 Binary files /dev/null and b/build/torch29-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..31e749efdff1ee341c214c67049d687123ed5a42 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e915bb752b7105f3c2594ababa4480e8de7408257b07f5897f82012377e8c7 +size 2837168 diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4d722bffa37106dd2bfdb75db14408c7eecefcb0 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/layers.py similarity index 73% rename from build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py rename to build/torch29-cxx11-cu126-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +++ b/build/torch29-cxx11-cu126-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py similarity index 76% rename from build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py rename to build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +++ b/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b934f588f1084b4e695f05dd5b505bb9f3b6977a Binary files /dev/null and b/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65804eed7cc7204dc308abe7c10470bb29e91534 Binary files /dev/null and b/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d232f4f0d36e80341e80d079349f68ddc9f5a3cc Binary files /dev/null and b/build/torch29-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..386275e1936b21f67c78effb606db9a1d69f729a --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74494aaff73017fd8103b598f6fc8c92085e3dc0be63bda413f658bb7bbfc9b0 +size 4116936 diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4d722bffa37106dd2bfdb75db14408c7eecefcb0 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/layers.py similarity index 73% rename from build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py rename to build/torch29-cxx11-cu128-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +++ b/build/torch29-cxx11-cu128-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py similarity index 76% rename from build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py rename to build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +++ b/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eac32d49e24d1a8671ffcddff8119d7a14e35f3f Binary files /dev/null and b/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f4111d9c64240435bd7d59958c320ea24e2f710 Binary files /dev/null and b/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40eec88689ee66667ecd946bb43a0cd137b80d38 Binary files /dev/null and b/build/torch29-cxx11-cu130-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch29-cxx11-cu130-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..38e458a1206168d344db213c3c06e3cd873a6834 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f5500ae615f8a0abf063368bf22c4d031a2e4a8893817bd3bcaffc321d1622d +size 4019704 diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4d722bffa37106dd2bfdb75db14408c7eecefcb0 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/layers.py similarity index 73% rename from build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py rename to build/torch29-cxx11-cu130-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +++ b/build/torch29-cxx11-cu130-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU.