Kernels
kernels-bot commited on
Commit
085e784
·
verified ·
1 Parent(s): 2f9e1c3

Uploaded using `kernel-builder`.

Browse files
build/torch210-cxx11-xpu20253-x86_64-linux/{_flash_attn2_xpu_85c21a0.abi3.so → _flash_attn2_xpu_042c80b.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a9d3044a02aff0b2f3c240b62c7eb54124878c480717102384dd89090e77ae
3
- size 17713168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2479c3e9bc4ed00bcf409271d3de218c673b3659628564a1ab7ff5eaebea15da
3
+ size 37243424
build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flash_attn2_xpu_85c21a0
3
- ops = torch.ops._flash_attn2_xpu_85c21a0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flash_attn2_xpu_85c21a0::{op_name}"
 
1
  import torch
2
+ from . import _flash_attn2_xpu_042c80b
3
+ ops = torch.ops._flash_attn2_xpu_042c80b
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flash_attn2_xpu_042c80b::{op_name}"
build/torch210-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED
@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
- return 64
 
 
 
 
 
 
 
40
 
41
  # This should match the block sizes in the CUDA kernel
42
  major, minor = torch.cuda.get_device_capability(device)
 
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
+ if head_dim <= 96:
40
+ return 64
41
+ elif head_dim <= 128:
42
+ return 32
43
+ elif head_dim <= 256:
44
+ return 64
45
+ else:
46
+ return 32
47
 
48
  # This should match the block sizes in the CUDA kernel
49
  major, minor = torch.cuda.get_device_capability(device)
build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flash-attn2",
3
- "id": "_flash_attn2_xpu_85c21a0",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
 
1
  {
2
  "name": "flash-attn2",
3
+ "id": "_flash_attn2_xpu_042c80b",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
build/torch211-cxx11-xpu20253-x86_64-linux/{_flash_attn2_xpu_85c21a0.abi3.so → _flash_attn2_xpu_042c80b.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54c1c44285ae6adbd98d2eeaaa1f475dd74170c5d5676091ac26bf01091a0da5
3
- size 17713168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b19f74934e7944b64651b4cc2ba6fac817d8a3f64074b50bd0cab50a57e847
3
+ size 37243424
build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flash_attn2_xpu_85c21a0
3
- ops = torch.ops._flash_attn2_xpu_85c21a0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flash_attn2_xpu_85c21a0::{op_name}"
 
1
  import torch
2
+ from . import _flash_attn2_xpu_042c80b
3
+ ops = torch.ops._flash_attn2_xpu_042c80b
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flash_attn2_xpu_042c80b::{op_name}"
build/torch211-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED
@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
- return 64
 
 
 
 
 
 
 
40
 
41
  # This should match the block sizes in the CUDA kernel
42
  major, minor = torch.cuda.get_device_capability(device)
 
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
+ if head_dim <= 96:
40
+ return 64
41
+ elif head_dim <= 128:
42
+ return 32
43
+ elif head_dim <= 256:
44
+ return 64
45
+ else:
46
+ return 32
47
 
48
  # This should match the block sizes in the CUDA kernel
49
  major, minor = torch.cuda.get_device_capability(device)
build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flash-attn2",
3
- "id": "_flash_attn2_xpu_85c21a0",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
 
1
  {
2
  "name": "flash-attn2",
3
+ "id": "_flash_attn2_xpu_042c80b",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
build/torch212-cxx11-xpu20253-x86_64-linux/{_flash_attn2_xpu_85c21a0.abi3.so → _flash_attn2_xpu_042c80b.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d32013ee9eba7775b2ce44c94d6059798f6c56cf7e9b32b5c7f7684aaedd99ff
3
- size 17672144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf9922ba471f172c427768886103c757505c66315fda421d3e490d2bf65fccb
3
+ size 37198312
build/torch212-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flash_attn2_xpu_85c21a0
3
- ops = torch.ops._flash_attn2_xpu_85c21a0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flash_attn2_xpu_85c21a0::{op_name}"
 
1
  import torch
2
+ from . import _flash_attn2_xpu_042c80b
3
+ ops = torch.ops._flash_attn2_xpu_042c80b
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flash_attn2_xpu_042c80b::{op_name}"
build/torch212-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED
@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
- return 64
 
 
 
 
 
 
 
40
 
41
  # This should match the block sizes in the CUDA kernel
42
  major, minor = torch.cuda.get_device_capability(device)
 
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
+ if head_dim <= 96:
40
+ return 64
41
+ elif head_dim <= 128:
42
+ return 32
43
+ elif head_dim <= 256:
44
+ return 64
45
+ else:
46
+ return 32
47
 
48
  # This should match the block sizes in the CUDA kernel
49
  major, minor = torch.cuda.get_device_capability(device)
build/torch212-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flash-attn2",
3
- "id": "_flash_attn2_xpu_85c21a0",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
 
1
  {
2
  "name": "flash-attn2",
3
+ "id": "_flash_attn2_xpu_042c80b",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],