Uploaded using `kernel-builder`.

Files changed (12) hide show

build/torch210-cxx11-xpu20253-x86_64-linux/{_flash_attn2_xpu_85c21a0.abi3.so → _flash_attn2_xpu_042c80b.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15a9d3044a02aff0b2f3c240b62c7eb54124878c480717102384dd89090e77ae
-size 17713168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2479c3e9bc4ed00bcf409271d3de218c673b3659628564a1ab7ff5eaebea15da
+size 37243424

build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_xpu_85c21a0
-ops = torch.ops._flash_attn2_xpu_85c21a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_xpu_85c21a0::{op_name}"

 import torch
+from . import _flash_attn2_xpu_042c80b
+ops = torch.ops._flash_attn2_xpu_042c80b
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_xpu_042c80b::{op_name}"

build/torch210-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
     if device.type == "xpu":
-        return 64
     # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)

     assert head_dim <= 256
     if device.type == "xpu":
+        if head_dim <= 96:
+            return 64
+        elif head_dim <= 128:
+            return 32
+        elif head_dim <= 256:
+            return 64
+        else:
+            return 32
     # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)

build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flash-attn2",
-  "id": "_flash_attn2_xpu_85c21a0",
   "version": 1,
   "license": "BSD-3-Clause",
   "python-depends": [],

 {
   "name": "flash-attn2",
+  "id": "_flash_attn2_xpu_042c80b",
   "version": 1,
   "license": "BSD-3-Clause",
   "python-depends": [],

build/torch211-cxx11-xpu20253-x86_64-linux/{_flash_attn2_xpu_85c21a0.abi3.so → _flash_attn2_xpu_042c80b.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54c1c44285ae6adbd98d2eeaaa1f475dd74170c5d5676091ac26bf01091a0da5
-size 17713168

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3b19f74934e7944b64651b4cc2ba6fac817d8a3f64074b50bd0cab50a57e847
+size 37243424

build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_xpu_85c21a0
-ops = torch.ops._flash_attn2_xpu_85c21a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_xpu_85c21a0::{op_name}"

 import torch
+from . import _flash_attn2_xpu_042c80b
+ops = torch.ops._flash_attn2_xpu_042c80b
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_xpu_042c80b::{op_name}"

build/torch211-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
     if device.type == "xpu":
-        return 64
     # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)

     assert head_dim <= 256
     if device.type == "xpu":
+        if head_dim <= 96:
+            return 64
+        elif head_dim <= 128:
+            return 32
+        elif head_dim <= 256:
+            return 64
+        else:
+            return 32
     # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)

build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flash-attn2",
-  "id": "_flash_attn2_xpu_85c21a0",
   "version": 1,
   "license": "BSD-3-Clause",
   "python-depends": [],

 {
   "name": "flash-attn2",
+  "id": "_flash_attn2_xpu_042c80b",
   "version": 1,
   "license": "BSD-3-Clause",
   "python-depends": [],

build/torch212-cxx11-xpu20253-x86_64-linux/{_flash_attn2_xpu_85c21a0.abi3.so → _flash_attn2_xpu_042c80b.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d32013ee9eba7775b2ce44c94d6059798f6c56cf7e9b32b5c7f7684aaedd99ff
-size 17672144

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cf9922ba471f172c427768886103c757505c66315fda421d3e490d2bf65fccb
+size 37198312

build/torch212-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_xpu_85c21a0
-ops = torch.ops._flash_attn2_xpu_85c21a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_xpu_85c21a0::{op_name}"

 import torch
+from . import _flash_attn2_xpu_042c80b
+ops = torch.ops._flash_attn2_xpu_042c80b
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_xpu_042c80b::{op_name}"

build/torch212-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -36,7 +36,14 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
     if device.type == "xpu":
-        return 64
     # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)

     assert head_dim <= 256
     if device.type == "xpu":
+        if head_dim <= 96:
+            return 64
+        elif head_dim <= 128:
+            return 32
+        elif head_dim <= 256:
+            return 64
+        else:
+            return 32
     # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)

build/torch212-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flash-attn2",
-  "id": "_flash_attn2_xpu_85c21a0",
   "version": 1,
   "license": "BSD-3-Clause",
   "python-depends": [],

 {
   "name": "flash-attn2",
+  "id": "_flash_attn2_xpu_042c80b",
   "version": 1,
   "license": "BSD-3-Clause",
   "python-depends": [],