diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..c59f6506a65a54819230080499358ef470b07cbe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,48 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +*.so filter=lfs diff=lfs merge=lfs -text +build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/rotary/_rotary_119c830.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/rotary/_rotary_cdcfefe.pyd filter=lfs diff=lfs merge=lfs -text +build/torch29-xpu20252-x86_64-windows/rotary/_rotary_cdcfefe.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/rotary/_rotary_dec30e1.pyd filter=lfs diff=lfs merge=lfs -text +build/torch29-xpu20252-x86_64-windows/rotary/_rotary_dec30e1.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/rotary/_rotary_66b961a.pyd filter=lfs diff=lfs merge=lfs -text +build/torch29-xpu20252-x86_64-windows/rotary/_rotary_66b961a.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/rotary/_rotary_9f63cc2.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-xpu20253-x86_64-windows/rotary/_rotary_9f63cc2.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd filter=lfs diff=lfs merge=lfs -text +build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..179b56bedf996512ad8327bfa4cc7af907981b79 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +--- +license: bsd-3-clause +tags: + - kernels +--- + +![Status](https://hubwebhook.dholtz.com/shield?repo=kernels-community/rotary) + +## rotary + +rotary embedding kernel from [Flash Attention](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/rotary). + +Kernel source: https://github.com/huggingface/kernels-community/tree/main/rotary + diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..cdc40732641641e3a82d30aec2d8643fd7f3f31a --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,119 @@ +import torch + +from kernels.benchmark import Benchmark + + +def apply_rotary_reference( + x1: torch.Tensor, x2: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor, conj: bool +) -> tuple[torch.Tensor, torch.Tensor]: + if not conj: + out1 = x1 * cos - x2 * sin + out2 = x1 * sin + x2 * cos + else: + out1 = x1 * cos + x2 * sin + out2 = -x1 * sin + x2 * cos + return out1, out2 + + +class RotaryBenchmark(Benchmark): + seed: int = 42 + + def setup(self): + batch_size = 2 + seqlen = 128 + num_heads = 8 + head_dim = 64 + rotary_dim = 32 + + # Query tensor split into rotary parts + self.x1 = torch.randn( + batch_size, + seqlen, + num_heads, + rotary_dim, + device=self.device, + dtype=torch.float32, + ) + self.x2 = torch.randn( + batch_size, + seqlen, + num_heads, + rotary_dim, + device=self.device, + dtype=torch.float32, + ) + + # Rotary position embeddings + self.cos = torch.randn( + seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32 + ) + self.sin = torch.randn( + seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32 + ) + + # Output tensors (in-place, so clone inputs) + self.out1 = self.x1.clone() + self.out2 = self.x2.clone() + + def benchmark_base(self): + # Reset outputs to input values for in-place operation + self.out1.copy_(self.x1) + self.out2.copy_(self.x2) + self.kernel.apply_rotary( + self.out1, self.out2, self.cos, self.sin, self.out1, self.out2, False + ) + + def verify_base(self) -> torch.Tensor: + ref_out1, ref_out2 = apply_rotary_reference( + self.x1, self.x2, self.cos, self.sin, False + ) + # Concatenate for comparison (benchmark compares self.out with returned tensor) + self.out = torch.cat([self.out1, self.out2], dim=-1) + return torch.cat([ref_out1, ref_out2], dim=-1) + + def setup_large(self): + batch_size = 8 + seqlen = 512 + num_heads = 32 + rotary_dim = 64 + + self.x1 = torch.randn( + batch_size, + seqlen, + num_heads, + rotary_dim, + device=self.device, + dtype=torch.float32, + ) + self.x2 = torch.randn( + batch_size, + seqlen, + num_heads, + rotary_dim, + device=self.device, + dtype=torch.float32, + ) + + self.cos = torch.randn( + seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32 + ) + self.sin = torch.randn( + seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32 + ) + + self.out1 = self.x1.clone() + self.out2 = self.x2.clone() + + def benchmark_large(self): + self.out1.copy_(self.x1) + self.out2.copy_(self.x2) + self.kernel.apply_rotary( + self.out1, self.out2, self.cos, self.sin, self.out1, self.out2, False + ) + + def verify_large(self) -> torch.Tensor: + ref_out1, ref_out2 = apply_rotary_reference( + self.x1, self.x2, self.cos, self.sin, False + ) + self.out = torch.cat([self.out1, self.out2], dim=-1) + return torch.cat([ref_out1, ref_out2], dim=-1) diff --git a/build/torch210-cu128-x86_64-windows/__init__.py b/build/torch210-cu128-x86_64-windows/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..537713178faffc508bce05bd7d15d96ff6c3bd4c --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cu128-x86_64-windows/_ops.py b/build/torch210-cu128-x86_64-windows/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..8dfdfd9a8cba564049603f81d84e8115957ff81c --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_07a01e5 +ops = torch.ops._rotary_cuda_07a01e5 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_07a01e5::{op_name}" diff --git a/build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd b/build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd new file mode 100644 index 0000000000000000000000000000000000000000..bba61677d0b769029bf21d3ac2fec570d5308319 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd29928a6e2a3930f4c7ec3bcffc37574981cf59bed97e6a8f3c522fa7ca0dda +size 10415616 diff --git a/build/torch210-cu128-x86_64-windows/metadata.json b/build/torch210-cu128-x86_64-windows/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f31b329a1dd1a4ca410bfb95958bc1af9b300910 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cu128-x86_64-windows/rotary/__init__.py b/build/torch210-cu128-x86_64-windows/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bc434ef44e63409acb52a8f3fff54a4adc46ed6a --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu126-aarch64-linux/__init__.py b/build/torch210-cxx11-cu126-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-cu126-aarch64-linux/_ops.py b/build/torch210-cxx11-cu126-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch210-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..b20dcb75df2a7911675e44cbbda0296e14bd3e6b --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7863cbd6a156cd3f873e926b2f8861e151d43952a26a989b9ad19753aa6270dc +size 8282888 diff --git a/build/torch210-cxx11-cu126-aarch64-linux/metadata.json b/build/torch210-cxx11-cu126-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0dacb99125f1112a811819ca1ffdde15c8c0faff --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch210-cxx11-cu126-aarch64-linux/rotary/__init__.py b/build/torch210-cxx11-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu126-x86_64-linux/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch210-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..eefc0825bd168ffb2beea9f8c061713f91c18fff --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ac4fb2c7bbe3b277ed069761faabce67d1e1f8b3d5708f2d6f0b8b1ccfa873 +size 8200568 diff --git a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0dacb99125f1112a811819ca1ffdde15c8c0faff --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch210-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu128-aarch64-linux/__init__.py b/build/torch210-cxx11-cu128-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-cu128-aarch64-linux/_ops.py b/build/torch210-cxx11-cu128-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch210-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..4c91a6f04e2d2c6b7b0bb779299c23247896a42e --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696ff3570b3f6fbc9623e44b53f189bb0be0bc6260d490616b03c58dd5dd2146 +size 12019200 diff --git a/build/torch210-cxx11-cu128-aarch64-linux/metadata.json b/build/torch210-cxx11-cu128-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu128-aarch64-linux/rotary/__init__.py b/build/torch210-cxx11-cu128-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu128-x86_64-linux/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch210-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..e8d0268e1282c0d4dd17a8731e17b59da799e0e6 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1238e4b57b2f30d5c5f67fc1d64a133de551f9b68b619271ac2a10f948d66b04 +size 11905904 diff --git a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu130-aarch64-linux/__init__.py b/build/torch210-cxx11-cu130-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-cu130-aarch64-linux/_ops.py b/build/torch210-cxx11-cu130-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch210-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..1a6fec3254f1e21490194c0bc6321977ff41af40 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:686edb81b5ffdc43e88e35995b962aed5d23061c6aa27aff61af910b76cf03bf +size 10411432 diff --git a/build/torch210-cxx11-cu130-aarch64-linux/metadata.json b/build/torch210-cxx11-cu130-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eff725542128e103dfb5df382d74940efff77214 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu130-aarch64-linux/rotary/__init__.py b/build/torch210-cxx11-cu130-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu130-x86_64-linux/__init__.py b/build/torch210-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_ops.py b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch210-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..07873a50b7d4180acef02c38372e8a4217e72258 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069004af51893d2f112d58bc00197cf813c5271ef6f9105936b7966bbb44881f +size 10310752 diff --git a/build/torch210-cxx11-cu130-x86_64-linux/metadata.json b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eff725542128e103dfb5df382d74940efff77214 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu130-x86_64-linux/rotary/__init__.py b/build/torch210-cxx11-cu130-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py b/build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py b/build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0316a95137455eff318a2ed3f70d396c1980d290 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_xpu_2022aa6 +ops = torch.ops._rotary_xpu_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_xpu_2022aa6::{op_name}" diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so b/build/torch210-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..bff78b8120c85f92bbcaeec042e0558a97a8c003 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ce5dd015655bbbccf535f2b7078b184d01831778effd3058fa24256be69111 +size 2301504 diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json b/build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8f032899cf61212add2325c22107252842bd1588 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "xpu" + } +} \ No newline at end of file diff --git a/build/torch210-cxx11-xpu20253-x86_64-linux/rotary/__init__.py b/build/torch210-cxx11-xpu20253-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-xpu20253-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-xpu20253-x86_64-windows/__init__.py b/build/torch210-xpu20253-x86_64-windows/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..537713178faffc508bce05bd7d15d96ff6c3bd4c --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch210-xpu20253-x86_64-windows/_ops.py b/build/torch210-xpu20253-x86_64-windows/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2f89db5464ca634c7664e6b311ca56da25d34b7c --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_xpu_07a01e5 +ops = torch.ops._rotary_xpu_07a01e5 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_xpu_07a01e5::{op_name}" diff --git a/build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd b/build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd new file mode 100644 index 0000000000000000000000000000000000000000..27056447e8d9fe208c01d248d5082f9e887ebac7 --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d857f2afd55cccc36d439f348ff360bdc7274c0e65660e41a2f8775526dec1 +size 396288 diff --git a/build/torch210-xpu20253-x86_64-windows/metadata.json b/build/torch210-xpu20253-x86_64-windows/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..de6520c18deaab0372f91d85948970c48240031c --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/metadata.json @@ -0,0 +1,5 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch210-xpu20253-x86_64-windows/rotary/__init__.py b/build/torch210-xpu20253-x86_64-windows/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bc434ef44e63409acb52a8f3fff54a4adc46ed6a --- /dev/null +++ b/build/torch210-xpu20253-x86_64-windows/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu126-aarch64-linux/__init__.py b/build/torch211-cxx11-cu126-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-cu126-aarch64-linux/_ops.py b/build/torch211-cxx11-cu126-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch211-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..87a4edbb7b4a50318b77d60394a9588955a35d67 --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9808fdc543b02bbf1614464032fbd1fcd9433e4fc8f8f38646c71d66821b98 +size 8279200 diff --git a/build/torch211-cxx11-cu126-aarch64-linux/metadata.json b/build/torch211-cxx11-cu126-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0dacb99125f1112a811819ca1ffdde15c8c0faff --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch211-cxx11-cu126-aarch64-linux/rotary/__init__.py b/build/torch211-cxx11-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu126-x86_64-linux/__init__.py b/build/torch211-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_ops.py b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch211-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..fc70cacbdeb5607aa83943b92b0fabdaea18395e --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1cd25cceded1b6b4ef38b25c640e69f41110e5678388e70ea05edf4c7ce061 +size 8193600 diff --git a/build/torch211-cxx11-cu126-x86_64-linux/metadata.json b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0dacb99125f1112a811819ca1ffdde15c8c0faff --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch211-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch211-cxx11-cu126-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu128-aarch64-linux/__init__.py b/build/torch211-cxx11-cu128-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-cu128-aarch64-linux/_ops.py b/build/torch211-cxx11-cu128-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch211-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..173fa9e3d2262b43d5776203b637b6334f76f00a --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f7a34ebfb0d84e1f301fac0293edacf6fea4321c7566d759d4e339c7d860fc +size 12015512 diff --git a/build/torch211-cxx11-cu128-aarch64-linux/metadata.json b/build/torch211-cxx11-cu128-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu128-aarch64-linux/rotary/__init__.py b/build/torch211-cxx11-cu128-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu128-x86_64-linux/__init__.py b/build/torch211-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_ops.py b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch211-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..3e769bfdb01dfeb5ef6f853a0ee13978e944e6f2 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8543effa188022e7fd780bf55a705873473ae908867c6fa1465efa72b611cc04 +size 11894840 diff --git a/build/torch211-cxx11-cu128-x86_64-linux/metadata.json b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch211-cxx11-cu128-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu130-aarch64-linux/__init__.py b/build/torch211-cxx11-cu130-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-cu130-aarch64-linux/_ops.py b/build/torch211-cxx11-cu130-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch211-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..6b2f5fe6c71a8c0693fe74848470281b5a9ee458 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008992ab92a2e0f4d5a63664706f69115de50db7d86c00f80cf944a85f979ae5 +size 10407744 diff --git a/build/torch211-cxx11-cu130-aarch64-linux/metadata.json b/build/torch211-cxx11-cu130-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eff725542128e103dfb5df382d74940efff77214 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu130-aarch64-linux/rotary/__init__.py b/build/torch211-cxx11-cu130-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu130-x86_64-linux/__init__.py b/build/torch211-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_ops.py b/build/torch211-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch211-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..602bc069951380743948af6480f96b9f27168e59 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795a71adb51bd0405c372522f4f13d60addf89f36d4784f1206273d38261bafd +size 10303784 diff --git a/build/torch211-cxx11-cu130-x86_64-linux/metadata.json b/build/torch211-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eff725542128e103dfb5df382d74940efff77214 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu130-x86_64-linux/rotary/__init__.py b/build/torch211-cxx11-cu130-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py b/build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py b/build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0316a95137455eff318a2ed3f70d396c1980d290 --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_xpu_2022aa6 +ops = torch.ops._rotary_xpu_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_xpu_2022aa6::{op_name}" diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so b/build/torch211-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..e16d7f4e849125d39825d5c1a844b6e383f7bc8d --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3e980c4780de32a6a55f04bc9642e516f7858a6174d2cd3b973d23141c17ce +size 2301504 diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json b/build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8f032899cf61212add2325c22107252842bd1588 --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "xpu" + } +} \ No newline at end of file diff --git a/build/torch211-cxx11-xpu20253-x86_64-linux/rotary/__init__.py b/build/torch211-cxx11-xpu20253-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-xpu20253-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch26-cxx11-cu126-aarch64-linux/rotary/__init__.py b/build/torch26-cxx11-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch26-cxx11-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch26-cxx11-cu126-aarch64-linux/rotary/_ops.py b/build/torch26-cxx11-cu126-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..8d9717343c07cf81e45646b6fc80dddc95d58bdf --- /dev/null +++ b/build/torch26-cxx11-cu126-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_6b8e81d +ops = torch.ops._rotary_6b8e81d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_6b8e81d::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so b/build/torch26-cxx11-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..7abd3dca0cbc48d258d764432d3f912e30beb788 --- /dev/null +++ b/build/torch26-cxx11-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91e09bffe6812e5fbf856a01a164bc41c4eb3f49e2102c723c20d695025a34e9 +size 4543712 diff --git a/build/torch26-cxx98-cu126-aarch64-linux/rotary/__init__.py b/build/torch26-cxx98-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch26-cxx98-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch26-cxx98-cu126-aarch64-linux/rotary/_ops.py b/build/torch26-cxx98-cu126-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..8d9717343c07cf81e45646b6fc80dddc95d58bdf --- /dev/null +++ b/build/torch26-cxx98-cu126-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_6b8e81d +ops = torch.ops._rotary_6b8e81d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_6b8e81d::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so b/build/torch26-cxx98-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..8fcb995b549848b4d7375e4d41bcd219f4857328 --- /dev/null +++ b/build/torch26-cxx98-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b215a74951fe7e1c8be6a8fb7f54483e0e393958acc4c410b9fca7ce70470e39 +size 4540224 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..973f6b3c5f14b063a77f0feb30beb6749e74e985 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24dc100bca31802e6e17c9293e489129509bea6c Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d43408adb5450ff15c2e04cd3311709823d05e29 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_98ffc18 +ops = torch.ops._rotary_98ffc18 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_98ffc18::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_98ffc18.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_98ffc18.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..9eac909edbaf49935f07cc1b554e77df437c30e0 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_98ffc18.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030e76c6ee0921ae7ada04dfe14fceb8a4454e794ddf9ce68f29a32e7075c9be +size 6807656 diff --git a/build/torch27-cxx11-cu126-aarch64-linux/rotary/__init__.py b/build/torch27-cxx11-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-cu126-aarch64-linux/rotary/_ops.py b/build/torch27-cxx11-cu126-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..8d9717343c07cf81e45646b6fc80dddc95d58bdf --- /dev/null +++ b/build/torch27-cxx11-cu126-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_6b8e81d +ops = torch.ops._rotary_6b8e81d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_6b8e81d::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so b/build/torch27-cxx11-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..56513966cfae8464c984ce7af405618bee4dfabb --- /dev/null +++ b/build/torch27-cxx11-cu126-aarch64-linux/rotary/_rotary_6b8e81d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fab33bc6bf4d4294efc1140427f8ff608a4633d8f6dfc9416547e78fc2dba4 +size 6378944 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9b67802d61c31b708beb2b07a6b65187dbd5ae1 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54042216baf24f270b9709ac7bc50bc654fd31f3 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d43408adb5450ff15c2e04cd3311709823d05e29 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_98ffc18 +ops = torch.ops._rotary_98ffc18 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_98ffc18::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_98ffc18.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_98ffc18.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..923de95a7bd6bbf59a860f4ab35fcc11cb371765 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_98ffc18.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a502a5e1b8282bfc625c617eb6b239a2c4277d9198ec0dd162589b61005c8c92 +size 6820496 diff --git a/build/torch27-cxx11-cu128-aarch64-linux/rotary/__init__.py b/build/torch27-cxx11-cu128-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-cu128-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60d5bfab0618d302e241517f1c7ec4ce6f9fb156 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fed6ec5d42bed01465a824b1081d1950022698cc Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/rotary/_ops.py b/build/torch27-cxx11-cu128-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cec319f8e2b3f08afbe538960d08c34a6b08e --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_6abd2a8 +ops = torch.ops._rotary_6abd2a8 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_6abd2a8::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-aarch64-linux/rotary/_rotary_6abd2a8.abi3.so b/build/torch27-cxx11-cu128-aarch64-linux/rotary/_rotary_6abd2a8.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..d26d05f076e49a4fbc1848f4c61173854ffce7a7 --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/rotary/_rotary_6abd2a8.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29909df0009da77fb276a0ffd328200a201b2fd06ec78c457e6c63554f4d3e2d +size 10639192 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52c6bb36a1ac3067d18af5428e95c9c081f5112f Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..781e81e09df4dbace6fbfd55b41b182206c546b8 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d43408adb5450ff15c2e04cd3311709823d05e29 --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_98ffc18 +ops = torch.ops._rotary_98ffc18 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_98ffc18::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_98ffc18.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_98ffc18.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..20b1a12d41e9ce96e265f91c2989cce7219e1f77 --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_98ffc18.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8acfdd58aac193ab809386077c567e58b3da4481b5bb38af87bd4cdc18e6dd2b +size 10529816 diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..969f133617d659b76547c15b0dfa58dfc7e50a20 Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5678d770414a30c0cf0d5b03123de2438c00a066 Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d43408adb5450ff15c2e04cd3311709823d05e29 --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_98ffc18 +ops = torch.ops._rotary_98ffc18 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_98ffc18::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_rotary_98ffc18.abi3.so b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_rotary_98ffc18.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..8173fd02037c1c71668f5f27551da5070912a1dd --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_rotary_98ffc18.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6c59c7df2489ffd3154047967b69d5a39788d784e2aa543b64ff192c184792 +size 2337512 diff --git a/build/torch28-cxx11-cu126-aarch64-linux/rotary/__init__.py b/build/torch28-cxx11-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch28-cxx11-cu126-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73c1a2cac7d35226a8f650ce5aff7fe0020f027d Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..677c6b7a644f88c0bfbc029c3640cb441914ed17 Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-aarch64-linux/rotary/_ops.py b/build/torch28-cxx11-cu126-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..9506529a11d919c18f69068b69a3ed69a630bace --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_005dcc7_dirty +ops = torch.ops._rotary_005dcc7_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_005dcc7_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-aarch64-linux/rotary/_rotary_005dcc7_dirty.abi3.so b/build/torch28-cxx11-cu126-aarch64-linux/rotary/_rotary_005dcc7_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..cbd5ed429365ebbeba58af0cb62a45e99a5073e3 --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/rotary/_rotary_005dcc7_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a66e336456fb3e5ad528a8e6eef13f2d6ed8289936fb99445bd45a53172750 +size 6380008 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2671c5d7b93b8ebd836715b40fea0fd4fbbffc1e --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_1d9fc74 +ops = torch.ops._rotary_1d9fc74 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_1d9fc74::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_rotary_1d9fc74.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/_rotary_1d9fc74.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..0ce5b4d6b2a5244edf4133c94a15afa2268cb444 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/_rotary_1d9fc74.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d0a2fccecef02464217ce8fcd0029b858d20d38a0df9e4be56da8fe5196c95 +size 8190648 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/metadata.json b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-cu128-aarch64-linux/rotary/__init__.py b/build/torch28-cxx11-cu128-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch28-cxx11-cu128-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6caf9406147987852b246c94b0370fbe237a919c Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e61932f2c6d305e84aa29f173031a65312e8e4b Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-aarch64-linux/rotary/_ops.py b/build/torch28-cxx11-cu128-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..9506529a11d919c18f69068b69a3ed69a630bace --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_005dcc7_dirty +ops = torch.ops._rotary_005dcc7_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_005dcc7_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-aarch64-linux/rotary/_rotary_005dcc7_dirty.abi3.so b/build/torch28-cxx11-cu128-aarch64-linux/rotary/_rotary_005dcc7_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..0b8b58a9c83ba9a0795bff49d90a348cd5576d02 --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/rotary/_rotary_005dcc7_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1789291b4f77fa26b1ed011d3dc768265cff6fe38bad6559a277c6eddabe7f4a +size 10247136 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2671c5d7b93b8ebd836715b40fea0fd4fbbffc1e --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_1d9fc74 +ops = torch.ops._rotary_1d9fc74 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_1d9fc74::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_rotary_1d9fc74.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/_rotary_1d9fc74.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..cd324605e809580f530a2a8a0cd5ee1844648f5c --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/_rotary_1d9fc74.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95350793e833b22a67c9569eb3936925cc128a13b4e2409ac2d23112519baf1c +size 11895768 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/metadata.json b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-cu129-aarch64-linux/rotary/__init__.py b/build/torch28-cxx11-cu129-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch28-cxx11-cu129-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3f633a77315c00e7ab7ec26434af98ba2813c6b Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7adf912877cb1f1d05201e5643c964bfb03ca111 Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-aarch64-linux/rotary/_ops.py b/build/torch28-cxx11-cu129-aarch64-linux/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cec319f8e2b3f08afbe538960d08c34a6b08e --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_6abd2a8 +ops = torch.ops._rotary_6abd2a8 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_6abd2a8::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-aarch64-linux/rotary/_rotary_6abd2a8.abi3.so b/build/torch28-cxx11-cu129-aarch64-linux/rotary/_rotary_6abd2a8.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ed2f0d1965c5a08e3a11d2335d0d445a078a072f --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/rotary/_rotary_6abd2a8.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d97b6efbc29c35c6a13325e8e533a8479169a75f5ca915743616cd86344a962 +size 10705440 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2671c5d7b93b8ebd836715b40fea0fd4fbbffc1e --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_1d9fc74 +ops = torch.ops._rotary_1d9fc74 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_1d9fc74::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_rotary_1d9fc74.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/_rotary_1d9fc74.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..9f90c6de8ec076811de9f052543b1bd0ea6cde20 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/_rotary_1d9fc74.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a5dee3e2e653cdb92486fb7c546c5e3a94fd5291351f1847e04f7dcaa6402c +size 11964056 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/metadata.json b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py b/build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2671c5d7b93b8ebd836715b40fea0fd4fbbffc1e --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_1d9fc74 +ops = torch.ops._rotary_1d9fc74 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_1d9fc74::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/_rotary_1d9fc74.abi3.so b/build/torch28-cxx11-xpu20251-x86_64-linux/_rotary_1d9fc74.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..3f6f5a1ae5aebd47b44b8aa60715f2ddfa981fce --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/_rotary_1d9fc74.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926df52f77f1250de78cdcf777f4c135dfc1a200c3726a4d98fdd1b83f3cbb07 +size 2338640 diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json b/build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cu130-x86_64-windows/rotary/__init__.py b/build/torch29-cu130-x86_64-windows/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fe210ea3a2e6da63872100ee4bc1b749552b463d --- /dev/null +++ b/build/torch29-cu130-x86_64-windows/rotary/__init__.py @@ -0,0 +1,53 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + position_ids: Optional[torch.Tensor] = None, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernels implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cu130-x86_64-windows/rotary/_ops.py b/build/torch29-cu130-x86_64-windows/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..028a253cf4050404f40d8dadac35fc9c509aa98c --- /dev/null +++ b/build/torch29-cu130-x86_64-windows/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_a793e44 +ops = torch.ops._rotary_a793e44 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_a793e44::{op_name}" \ No newline at end of file diff --git a/build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd b/build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd new file mode 100644 index 0000000000000000000000000000000000000000..3174a0d680b4c6f57a4c8d8521bb088c08c1aeb0 --- /dev/null +++ b/build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606c6eb81894dc8197f73e0e71a5356f56c61c612e5f77ab5c3d7c351eab8d3a +size 8007680 diff --git a/build/torch29-cxx11-cu126-aarch64-linux/__init__.py b/build/torch29-cxx11-cu126-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu126-aarch64-linux/_ops.py b/build/torch29-cxx11-cu126-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0040359339944bb061af9ca88170e28934477a4d --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_4e81b67 +ops = torch.ops._rotary_cuda_4e81b67 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_4e81b67::{op_name}" diff --git a/build/torch29-cxx11-cu126-aarch64-linux/_rotary_cuda_4e81b67.abi3.so b/build/torch29-cxx11-cu126-aarch64-linux/_rotary_cuda_4e81b67.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..53c031fd4a92dbde7d393b494c94f87468fcaf84 --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/_rotary_cuda_4e81b67.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dba12829865f9420696f7d05da5c56518fbb55d932e8acc1eb271be97ee1acf +size 8280552 diff --git a/build/torch29-cxx11-cu126-aarch64-linux/metadata.json b/build/torch29-cxx11-cu126-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0dacb99125f1112a811819ca1ffdde15c8c0faff --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch29-cxx11-cu126-aarch64-linux/rotary/__init__.py b/build/torch29-cxx11-cu126-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu126-x86_64-linux/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0040359339944bb061af9ca88170e28934477a4d --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_4e81b67 +ops = torch.ops._rotary_cuda_4e81b67 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_4e81b67::{op_name}" diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_rotary_cuda_4e81b67.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/_rotary_cuda_4e81b67.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..11be7d69f8e17bce8b5f4ae96d45f494e1a0dc8b --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/_rotary_cuda_4e81b67.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d9271f2c7650370cafbb527311eafea0ae8a39cfeb8fe12873fbc0a142588a +size 8190552 diff --git a/build/torch29-cxx11-cu126-x86_64-linux/metadata.json b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0dacb99125f1112a811819ca1ffdde15c8c0faff --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch29-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu128-aarch64-linux/__init__.py b/build/torch29-cxx11-cu128-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu128-aarch64-linux/_ops.py b/build/torch29-cxx11-cu128-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0040359339944bb061af9ca88170e28934477a4d --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_4e81b67 +ops = torch.ops._rotary_cuda_4e81b67 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_4e81b67::{op_name}" diff --git a/build/torch29-cxx11-cu128-aarch64-linux/_rotary_cuda_4e81b67.abi3.so b/build/torch29-cxx11-cu128-aarch64-linux/_rotary_cuda_4e81b67.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..0b7e1c3230822c71e1281ec7a8bc33f9bf0d871a --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/_rotary_cuda_4e81b67.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ba03d9bdd3fbe264d722c2e066493fc7ab72de5014bf3ee548fc0f86bb30d7 +size 12016752 diff --git a/build/torch29-cxx11-cu128-aarch64-linux/metadata.json b/build/torch29-cxx11-cu128-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu128-aarch64-linux/rotary/__init__.py b/build/torch29-cxx11-cu128-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu128-x86_64-linux/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0040359339944bb061af9ca88170e28934477a4d --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_4e81b67 +ops = torch.ops._rotary_cuda_4e81b67 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_4e81b67::{op_name}" diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_rotary_cuda_4e81b67.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/_rotary_cuda_4e81b67.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..7fc5c476a2550473618d47acf930aad93311b087 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/_rotary_cuda_4e81b67.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34033788a41442c5ce7fd43688c3efb3357f068b612b2617652037e60765e3a1 +size 11899984 diff --git a/build/torch29-cxx11-cu128-x86_64-linux/metadata.json b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu129-aarch64-linux/__init__.py b/build/torch29-cxx11-cu129-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu129-aarch64-linux/_ops.py b/build/torch29-cxx11-cu129-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch29-cxx11-cu129-aarch64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch29-cxx11-cu129-aarch64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..79373c7c4f5fe10983232886b6677a3cd1981703 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9bd5da495de58a9054535da6724fe2ac98a81b9746c6a0455195fa5eea42a2 +size 12081848 diff --git a/build/torch29-cxx11-cu129-aarch64-linux/metadata.json b/build/torch29-cxx11-cu129-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu129-aarch64-linux/rotary/__init__.py b/build/torch29-cxx11-cu129-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu129-x86_64-linux/__init__.py b/build/torch29-cxx11-cu129-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu129-x86_64-linux/_ops.py b/build/torch29-cxx11-cu129-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..d09f2f0956a472d57a4bb833d515b40d124f276f --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_2022aa6 +ops = torch.ops._rotary_cuda_2022aa6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_2022aa6::{op_name}" diff --git a/build/torch29-cxx11-cu129-x86_64-linux/_rotary_cuda_2022aa6.abi3.so b/build/torch29-cxx11-cu129-x86_64-linux/_rotary_cuda_2022aa6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..bff12f8743608a39b19b3482c2e3b7c8890f3e76 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/_rotary_cuda_2022aa6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e6433b2165e9f2c8c2dd84e828b231f51920ba35119d515f5d25606f19e661 +size 11964176 diff --git a/build/torch29-cxx11-cu129-x86_64-linux/metadata.json b/build/torch29-cxx11-cu129-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a794c92436c3827ae79b48d55f7ea964afd50f52 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu129-x86_64-linux/rotary/__init__.py b/build/torch29-cxx11-cu129-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu130-aarch64-linux/__init__.py b/build/torch29-cxx11-cu130-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu130-aarch64-linux/_ops.py b/build/torch29-cxx11-cu130-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0040359339944bb061af9ca88170e28934477a4d --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_4e81b67 +ops = torch.ops._rotary_cuda_4e81b67 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_4e81b67::{op_name}" diff --git a/build/torch29-cxx11-cu130-aarch64-linux/_rotary_cuda_4e81b67.abi3.so b/build/torch29-cxx11-cu130-aarch64-linux/_rotary_cuda_4e81b67.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..0e905f7a4fb4532dcbd9d9855ce9acfa5754c9d7 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/_rotary_cuda_4e81b67.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e09ee619e75dfcc66cabaebc1da15b08b2550449e1e598b16968a0a5b8dff0 +size 10408984 diff --git a/build/torch29-cxx11-cu130-aarch64-linux/metadata.json b/build/torch29-cxx11-cu130-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eff725542128e103dfb5df382d74940efff77214 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu130-aarch64-linux/rotary/__init__.py b/build/torch29-cxx11-cu130-aarch64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu130-x86_64-linux/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-cu130-x86_64-linux/_ops.py b/build/torch29-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0040359339944bb061af9ca88170e28934477a4d --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cuda_4e81b67 +ops = torch.ops._rotary_cuda_4e81b67 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cuda_4e81b67::{op_name}" diff --git a/build/torch29-cxx11-cu130-x86_64-linux/_rotary_cuda_4e81b67.abi3.so b/build/torch29-cxx11-cu130-x86_64-linux/_rotary_cuda_4e81b67.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..8a78791edab97f3f479b441c2cae413c84d7df36 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/_rotary_cuda_4e81b67.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf0f41e92c54c3410e06b820e501007aa115ae95808f144de0c1281cfed4da7c +size 10304832 diff --git a/build/torch29-cxx11-cu130-x86_64-linux/metadata.json b/build/torch29-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eff725542128e103dfb5df382d74940efff77214 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu130-x86_64-linux/rotary/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a32e6a58cb685314795328dccadba33e87eaee6f --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py b/build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..89c673c2015410bb249176345087b5e291299350 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_xpu_17de4fe +ops = torch.ops._rotary_xpu_17de4fe + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_xpu_17de4fe::{op_name}" diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/_rotary_xpu_17de4fe.abi3.so b/build/torch29-cxx11-xpu20252-x86_64-linux/_rotary_xpu_17de4fe.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..f3d64122d3c290b60ca632899a4d6d739e06de21 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/_rotary_xpu_17de4fe.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a60fb18c3e28fb30f341f8fc88a54ffac29be9db41da579b89be1b9ec3576acd +size 2287136 diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json b/build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8f032899cf61212add2325c22107252842bd1588 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json @@ -0,0 +1,8 @@ +{ + "version": 1, + "license": "BSD-3-Clause", + "python-depends": [], + "backend": { + "type": "xpu" + } +} \ No newline at end of file diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/rotary/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch29-cxx11-xpu20252-x86_64-linux/rotary/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-xpu20252-x86_64-windows/metadata.json b/build/torch29-xpu20252-x86_64-windows/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-xpu20252-x86_64-windows/rotary/__init__.py b/build/torch29-xpu20252-x86_64-windows/rotary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..537713178faffc508bce05bd7d15d96ff6c3bd4c --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rotary/__init__.py @@ -0,0 +1,52 @@ +from typing import Optional, Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +) -> None: + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +def apply_rotary_transformers( + q: torch.Tensor, + k: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + unsqueeze_dim: int = 1, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Rotary kernel implementation wrapper + Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + q_rotated = q.clone() + k_rotated = k.clone() + + # Get half dimension for rotation + half_dim = q.shape[-1] // 2 + q1 = q_rotated[..., :half_dim] + q2 = q_rotated[..., half_dim:] + k1 = k_rotated[..., :half_dim] + k2 = k_rotated[..., half_dim:] + if cos.shape[-1] != half_dim: + # Trim cos/sin to match half_dim + cos = cos[..., :half_dim] + sin = sin[..., :half_dim] + + apply_rotary(q1, q2, cos, sin, q1, q2, False) + apply_rotary(k1, k2, cos, sin, k1, k2, False) + return q_rotated, k_rotated + + +__all__ = ["apply_rotary", "apply_rotary_transformers"] diff --git a/build/torch29-xpu20252-x86_64-windows/rotary/_ops.py b/build/torch29-xpu20252-x86_64-windows/rotary/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..6367e8893026e18a84480fe87d722f439ed4fa51 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_66b961a +ops = torch.ops._rotary_66b961a + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_66b961a::{op_name}" \ No newline at end of file diff --git a/build/torch29-xpu20252-x86_64-windows/rotary/_rotary_66b961a.pyd b/build/torch29-xpu20252-x86_64-windows/rotary/_rotary_66b961a.pyd new file mode 100644 index 0000000000000000000000000000000000000000..b838aa60a9c8d6e52c4146d5131c3da6854af356 --- /dev/null +++ b/build/torch29-xpu20252-x86_64-windows/rotary/_rotary_66b961a.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4067ad1adb1b5a73202ffc9c78f8c827ff9c273506705670509ae81ffac68484 +size 388096