Kernels
kernels-bot commited on
Commit
1ca86f6
·
verified ·
1 Parent(s): d2aa29a

Uploaded using `kernel-builder`.

Browse files
build/torch210-cxx11-cpu-x86_64-linux/{_flash_attn2_cpu_85c21a0.abi3.so → _flash_attn2_cpu_042c80b.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adbb0be576fc0c6f55a884925959b5064e1c4d2b2969b71ea14b12d59f5ead5a
3
  size 1942240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c84eb7a0c9bdf71b230c454cc95bebdbb85f3ba6d5bcc6225bcae299acbfef5
3
  size 1942240
build/torch210-cxx11-cpu-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flash_attn2_cpu_85c21a0
3
- ops = torch.ops._flash_attn2_cpu_85c21a0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flash_attn2_cpu_85c21a0::{op_name}"
 
1
  import torch
2
+ from . import _flash_attn2_cpu_042c80b
3
+ ops = torch.ops._flash_attn2_cpu_042c80b
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flash_attn2_cpu_042c80b::{op_name}"
build/torch210-cxx11-cpu-x86_64-linux/flash_attn_interface.py CHANGED
@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
- return 64
 
 
 
 
 
 
 
40
 
41
  # This should match the block sizes in the CUDA kernel
42
  major, minor = torch.cuda.get_device_capability(device)
 
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
+ if head_dim <= 96:
40
+ return 64
41
+ elif head_dim <= 128:
42
+ return 32
43
+ elif head_dim <= 256:
44
+ return 64
45
+ else:
46
+ return 32
47
 
48
  # This should match the block sizes in the CUDA kernel
49
  major, minor = torch.cuda.get_device_capability(device)
build/torch210-cxx11-cpu-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flash-attn2",
3
- "id": "_flash_attn2_cpu_85c21a0",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
 
1
  {
2
  "name": "flash-attn2",
3
+ "id": "_flash_attn2_cpu_042c80b",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
build/torch211-cxx11-cpu-x86_64-linux/{_flash_attn2_cpu_85c21a0.abi3.so → _flash_attn2_cpu_042c80b.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd2e125a7a3a2deafeaa924e01c39305d5cd87707df7edfcd7e6229fe49c68d
3
  size 1942240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:149707762062fc9ad367d0395126db2bee01fdf8cc1b9f2677e147529dfc5734
3
  size 1942240
build/torch211-cxx11-cpu-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flash_attn2_cpu_85c21a0
3
- ops = torch.ops._flash_attn2_cpu_85c21a0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flash_attn2_cpu_85c21a0::{op_name}"
 
1
  import torch
2
+ from . import _flash_attn2_cpu_042c80b
3
+ ops = torch.ops._flash_attn2_cpu_042c80b
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flash_attn2_cpu_042c80b::{op_name}"
build/torch211-cxx11-cpu-x86_64-linux/flash_attn_interface.py CHANGED
@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
- return 64
 
 
 
 
 
 
 
40
 
41
  # This should match the block sizes in the CUDA kernel
42
  major, minor = torch.cuda.get_device_capability(device)
 
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
+ if head_dim <= 96:
40
+ return 64
41
+ elif head_dim <= 128:
42
+ return 32
43
+ elif head_dim <= 256:
44
+ return 64
45
+ else:
46
+ return 32
47
 
48
  # This should match the block sizes in the CUDA kernel
49
  major, minor = torch.cuda.get_device_capability(device)
build/torch211-cxx11-cpu-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flash-attn2",
3
- "id": "_flash_attn2_cpu_85c21a0",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
 
1
  {
2
  "name": "flash-attn2",
3
+ "id": "_flash_attn2_cpu_042c80b",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
build/torch212-cxx11-cpu-x86_64-linux/{_flash_attn2_cpu_85c21a0.abi3.so → _flash_attn2_cpu_042c80b.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d087d91e9fdb319aac66e3475d7c5456e380ab8acdd66fed2d5e41602994d5ea
3
  size 1942272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5939950002f1e383de6d52c61b7b8c79e563b56398038c408a262160f9a36130
3
  size 1942272
build/torch212-cxx11-cpu-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flash_attn2_cpu_85c21a0
3
- ops = torch.ops._flash_attn2_cpu_85c21a0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flash_attn2_cpu_85c21a0::{op_name}"
 
1
  import torch
2
+ from . import _flash_attn2_cpu_042c80b
3
+ ops = torch.ops._flash_attn2_cpu_042c80b
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flash_attn2_cpu_042c80b::{op_name}"
build/torch212-cxx11-cpu-x86_64-linux/flash_attn_interface.py CHANGED
@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
- return 64
 
 
 
 
 
 
 
40
 
41
  # This should match the block sizes in the CUDA kernel
42
  major, minor = torch.cuda.get_device_capability(device)
 
36
  assert head_dim <= 256
37
 
38
  if device.type == "xpu":
39
+ if head_dim <= 96:
40
+ return 64
41
+ elif head_dim <= 128:
42
+ return 32
43
+ elif head_dim <= 256:
44
+ return 64
45
+ else:
46
+ return 32
47
 
48
  # This should match the block sizes in the CUDA kernel
49
  major, minor = torch.cuda.get_device_capability(device)
build/torch212-cxx11-cpu-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flash-attn2",
3
- "id": "_flash_attn2_cpu_85c21a0",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],
 
1
  {
2
  "name": "flash-attn2",
3
+ "id": "_flash_attn2_cpu_042c80b",
4
  "version": 1,
5
  "license": "BSD-3-Clause",
6
  "python-depends": [],