File size: 1,536 Bytes
e8e4be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c5e7b7
 
 
 
 
 
 
 
 
e8e4be6
0c5e7b7
 
e8e4be6
 
0c5e7b7
e8e4be6
 
 
 
 
 
0c5e7b7
 
 
e8e4be6
 
 
 
0c5e7b7
 
 
e8e4be6
 
 
 
 
 
0c5e7b7
 
 
e8e4be6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "numpy",
#     "torch==2.8.0",
#     "kernels-benchmark-tools",
# ]
#
# [tool.uv.sources]
# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
# ///
import torch
import sys
from kernels_benchmark_tools import KernelTypeEnum, run_benchmark


def apply_rotary_torch(x1, x2, cos, sin, conj=False):
    """Reference rotary implementation."""
    if not conj:
        out1 = x1 * cos - x2 * sin
        out2 = x1 * sin + x2 * cos
    else:
        out1 = x1 * cos + x2 * sin
        out2 = -x1 * sin + x2 * cos
    return out1, out2


def torch_rotary(query, key, cos, sin, conj=False):
    rotary_dim = cos.shape[-1]

    # Clone inputs to avoid modifying them
    q_out = query.clone()
    k_out = key.clone()

    # Apply rotation to query
    q1 = q_out[..., :rotary_dim]
    q2 = q_out[..., rotary_dim : 2 * rotary_dim]
    q_out_1, q_out_2 = apply_rotary_torch(q1, q2, cos, sin, conj)
    q_out[..., :rotary_dim] = q_out_1
    q_out[..., rotary_dim : 2 * rotary_dim] = q_out_2

    # Apply rotation to key
    k1 = k_out[..., :rotary_dim]
    k2 = k_out[..., rotary_dim : 2 * rotary_dim]
    k_out_1, k_out_2 = apply_rotary_torch(k1, k2, cos, sin, conj)
    k_out[..., :rotary_dim] = k_out_1
    k_out[..., rotary_dim : 2 * rotary_dim] = k_out_2

    return q_out, k_out


run_benchmark(
    kernel_type=KernelTypeEnum.ROTARY,
    impl_name="torch_eager",
    impl_tags={"family": "pytorch", "backend": "eager"},
    impl_func=torch_rotary,
)