Kernels:

kernels-community
/

rotary

Trusted publisher

Kernel card Files Files and versions

xet

Community

drbh commited on Apr 3

Commit

663e348

unverified ·

0 Parent(s):

Migrated from kernels-community/rotary

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +48 -0
README.md +14 -0
benchmarks/benchmark.py +119 -0
build/torch210-cu128-x86_64-windows/__init__.py +52 -0
build/torch210-cu128-x86_64-windows/_ops.py +9 -0
build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd +3 -0
build/torch210-cu128-x86_64-windows/metadata.json +21 -0
build/torch210-cu128-x86_64-windows/rotary/__init__.py +26 -0
build/torch210-cxx11-cu126-aarch64-linux/__init__.py +52 -0
build/torch210-cxx11-cu126-aarch64-linux/_ops.py +9 -0
build/torch210-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so +3 -0
build/torch210-cxx11-cu126-aarch64-linux/metadata.json +18 -0
build/torch210-cxx11-cu126-aarch64-linux/rotary/__init__.py +26 -0
build/torch210-cxx11-cu126-x86_64-linux/__init__.py +52 -0
build/torch210-cxx11-cu126-x86_64-linux/_ops.py +9 -0
build/torch210-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so +3 -0
build/torch210-cxx11-cu126-x86_64-linux/metadata.json +18 -0
build/torch210-cxx11-cu126-x86_64-linux/rotary/__init__.py +26 -0
build/torch210-cxx11-cu128-aarch64-linux/__init__.py +52 -0
build/torch210-cxx11-cu128-aarch64-linux/_ops.py +9 -0
build/torch210-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so +3 -0
build/torch210-cxx11-cu128-aarch64-linux/metadata.json +21 -0
build/torch210-cxx11-cu128-aarch64-linux/rotary/__init__.py +26 -0
build/torch210-cxx11-cu128-x86_64-linux/__init__.py +52 -0
build/torch210-cxx11-cu128-x86_64-linux/_ops.py +9 -0
build/torch210-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so +3 -0
build/torch210-cxx11-cu128-x86_64-linux/metadata.json +21 -0
build/torch210-cxx11-cu128-x86_64-linux/rotary/__init__.py +26 -0
build/torch210-cxx11-cu130-aarch64-linux/__init__.py +52 -0
build/torch210-cxx11-cu130-aarch64-linux/_ops.py +9 -0
build/torch210-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so +3 -0
build/torch210-cxx11-cu130-aarch64-linux/metadata.json +19 -0
build/torch210-cxx11-cu130-aarch64-linux/rotary/__init__.py +26 -0
build/torch210-cxx11-cu130-x86_64-linux/__init__.py +52 -0
build/torch210-cxx11-cu130-x86_64-linux/_ops.py +9 -0
build/torch210-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so +3 -0
build/torch210-cxx11-cu130-x86_64-linux/metadata.json +19 -0
build/torch210-cxx11-cu130-x86_64-linux/rotary/__init__.py +26 -0
build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py +52 -0
build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py +9 -0
build/torch210-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so +3 -0
build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json +8 -0
build/torch210-cxx11-xpu20253-x86_64-linux/rotary/__init__.py +26 -0
build/torch210-xpu20253-x86_64-windows/__init__.py +52 -0
build/torch210-xpu20253-x86_64-windows/_ops.py +9 -0
build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd +3 -0
build/torch210-xpu20253-x86_64-windows/metadata.json +5 -0
build/torch210-xpu20253-x86_64-windows/rotary/__init__.py +26 -0
build/torch211-cxx11-cu126-aarch64-linux/__init__.py +52 -0
build/torch211-cxx11-cu126-aarch64-linux/_ops.py +9 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,48 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cu128-x86_64-windows/rotary/_rotary_119c830.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cu128-x86_64-windows/rotary/_rotary_cdcfefe.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch29-xpu20252-x86_64-windows/rotary/_rotary_cdcfefe.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cu128-x86_64-windows/rotary/_rotary_dec30e1.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch29-xpu20252-x86_64-windows/rotary/_rotary_dec30e1.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cu128-x86_64-windows/rotary/_rotary_66b961a.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch29-xpu20252-x86_64-windows/rotary/_rotary_66b961a.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cu128-x86_64-windows/rotary/_rotary_9f63cc2.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-xpu20253-x86_64-windows/rotary/_rotary_9f63cc2.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+license: bsd-3-clause
+tags:
+  - kernels
+---
+![Status](https://hubwebhook.dholtz.com/shield?repo=kernels-community/rotary)
+## rotary
+rotary embedding kernel from [Flash Attention](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/rotary).
+Kernel source: https://github.com/huggingface/kernels-community/tree/main/rotary

benchmarks/benchmark.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+from kernels.benchmark import Benchmark
+def apply_rotary_reference(
+    x1: torch.Tensor, x2: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor, conj: bool
+) -> tuple[torch.Tensor, torch.Tensor]:
+    if not conj:
+        out1 = x1 * cos - x2 * sin
+        out2 = x1 * sin + x2 * cos
+    else:
+        out1 = x1 * cos + x2 * sin
+        out2 = -x1 * sin + x2 * cos
+    return out1, out2
+class RotaryBenchmark(Benchmark):
+    seed: int = 42
+    def setup(self):
+        batch_size = 2
+        seqlen = 128
+        num_heads = 8
+        head_dim = 64
+        rotary_dim = 32
+        # Query tensor split into rotary parts
+        self.x1 = torch.randn(
+            batch_size,
+            seqlen,
+            num_heads,
+            rotary_dim,
+            device=self.device,
+            dtype=torch.float32,
+        )
+        self.x2 = torch.randn(
+            batch_size,
+            seqlen,
+            num_heads,
+            rotary_dim,
+            device=self.device,
+            dtype=torch.float32,
+        )
+        # Rotary position embeddings
+        self.cos = torch.randn(
+            seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32
+        )
+        self.sin = torch.randn(
+            seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32
+        )
+        # Output tensors (in-place, so clone inputs)
+        self.out1 = self.x1.clone()
+        self.out2 = self.x2.clone()
+    def benchmark_base(self):
+        # Reset outputs to input values for in-place operation
+        self.out1.copy_(self.x1)
+        self.out2.copy_(self.x2)
+        self.kernel.apply_rotary(
+            self.out1, self.out2, self.cos, self.sin, self.out1, self.out2, False
+        )
+    def verify_base(self) -> torch.Tensor:
+        ref_out1, ref_out2 = apply_rotary_reference(
+            self.x1, self.x2, self.cos, self.sin, False
+        )
+        # Concatenate for comparison (benchmark compares self.out with returned tensor)
+        self.out = torch.cat([self.out1, self.out2], dim=-1)
+        return torch.cat([ref_out1, ref_out2], dim=-1)
+    def setup_large(self):
+        batch_size = 8
+        seqlen = 512
+        num_heads = 32
+        rotary_dim = 64
+        self.x1 = torch.randn(
+            batch_size,
+            seqlen,
+            num_heads,
+            rotary_dim,
+            device=self.device,
+            dtype=torch.float32,
+        )
+        self.x2 = torch.randn(
+            batch_size,
+            seqlen,
+            num_heads,
+            rotary_dim,
+            device=self.device,
+            dtype=torch.float32,
+        )
+        self.cos = torch.randn(
+            seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32
+        )
+        self.sin = torch.randn(
+            seqlen, 1, rotary_dim, device=self.device, dtype=torch.float32
+        )
+        self.out1 = self.x1.clone()
+        self.out2 = self.x2.clone()
+    def benchmark_large(self):
+        self.out1.copy_(self.x1)
+        self.out2.copy_(self.x2)
+        self.kernel.apply_rotary(
+            self.out1, self.out2, self.cos, self.sin, self.out1, self.out2, False
+        )
+    def verify_large(self) -> torch.Tensor:
+        ref_out1, ref_out2 = apply_rotary_reference(
+            self.x1, self.x2, self.cos, self.sin, False
+        )
+        self.out = torch.cat([self.out1, self.out2], dim=-1)
+        return torch.cat([ref_out1, ref_out2], dim=-1)

build/torch210-cu128-x86_64-windows/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cu128-x86_64-windows/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_07a01e5
+ops = torch.ops._rotary_cuda_07a01e5
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_07a01e5::{op_name}"

build/torch210-cu128-x86_64-windows/_rotary_cuda_07a01e5.pyd ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd29928a6e2a3930f4c7ec3bcffc37574981cf59bed97e6a8f3c522fa7ca0dda
+size 10415616

build/torch210-cu128-x86_64-windows/metadata.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}

build/torch210-cu128-x86_64-windows/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu126-aarch64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-cu126-aarch64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"

build/torch210-cxx11-cu126-aarch64-linux/_rotary_cuda_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7863cbd6a156cd3f873e926b2f8861e151d43952a26a989b9ad19753aa6270dc
+size 8282888

build/torch210-cxx11-cu126-aarch64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}

build/torch210-cxx11-cu126-aarch64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu126-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-cu126-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"

build/torch210-cxx11-cu126-x86_64-linux/_rotary_cuda_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2ac4fb2c7bbe3b277ed069761faabce67d1e1f8b3d5708f2d6f0b8b1ccfa873
+size 8200568

build/torch210-cxx11-cu126-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}

build/torch210-cxx11-cu126-x86_64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu128-aarch64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-cu128-aarch64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"

build/torch210-cxx11-cu128-aarch64-linux/_rotary_cuda_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:696ff3570b3f6fbc9623e44b53f189bb0be0bc6260d490616b03c58dd5dd2146
+size 12019200

build/torch210-cxx11-cu128-aarch64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}

build/torch210-cxx11-cu128-aarch64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu128-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-cu128-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"

build/torch210-cxx11-cu128-x86_64-linux/_rotary_cuda_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1238e4b57b2f30d5c5f67fc1d64a133de551f9b68b619271ac2a10f948d66b04
+size 11905904

build/torch210-cxx11-cu128-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}

build/torch210-cxx11-cu128-x86_64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu130-aarch64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-cu130-aarch64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"

build/torch210-cxx11-cu130-aarch64-linux/_rotary_cuda_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:686edb81b5ffdc43e88e35995b962aed5d23061c6aa27aff61af910b76cf03bf
+size 10411432

build/torch210-cxx11-cu130-aarch64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "11.0",
+      "12.0+PTX",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}

build/torch210-cxx11-cu130-aarch64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-cu130-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-cu130-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"

build/torch210-cxx11-cu130-x86_64-linux/_rotary_cuda_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:069004af51893d2f112d58bc00197cf813c5271ef6f9105936b7966bbb44881f
+size 10310752

build/torch210-cxx11-cu130-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "11.0",
+      "12.0+PTX",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}

build/torch210-cxx11-cu130-x86_64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_xpu_2022aa6
+ops = torch.ops._rotary_xpu_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_xpu_2022aa6::{op_name}"

build/torch210-cxx11-xpu20253-x86_64-linux/_rotary_xpu_2022aa6.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26ce5dd015655bbbccf535f2b7078b184d01831778effd3058fa24256be69111
+size 2301504

build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "xpu"
+  }
+}

build/torch210-cxx11-xpu20253-x86_64-linux/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-xpu20253-x86_64-windows/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch210-xpu20253-x86_64-windows/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_xpu_07a01e5
+ops = torch.ops._rotary_xpu_07a01e5
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_xpu_07a01e5::{op_name}"

build/torch210-xpu20253-x86_64-windows/_rotary_xpu_07a01e5.pyd ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02d857f2afd55cccc36d439f348ff360bdc7274c0e65660e41a2f8775526dec1
+size 396288

build/torch210-xpu20253-x86_64-windows/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": []
+}

build/torch210-xpu20253-x86_64-windows/rotary/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch211-cxx11-cu126-aarch64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernel implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch211-cxx11-cu126-aarch64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rotary_cuda_2022aa6
+ops = torch.ops._rotary_cuda_2022aa6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_cuda_2022aa6::{op_name}"