bababababooey
/

vllm-flash-attn3

Model card Files Files and versions

bababababooey commited on Aug 15, 2025

Commit

e76fe3a

·

verified ·

1 Parent(s): f62c8dc

Update build.toml

Files changed (1) hide show

build.toml +5 -9

build.toml CHANGED Viewed

@@ -1,8 +1,8 @@
 [general]
 name = "vllm_flash_attn3"
 universal = false
-cuda-minver = "12.4"
-cuda-maxver = "12.4"
 [torch]
 src = [
@@ -18,8 +18,6 @@ cuda-flags = [
   "-O3",
   "-std=c++17",
   "--ftemplate-backtrace-limit=0",              # To debug template code
-  "--use_fast_math",
-  "-DCUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1"
   "--expt-relaxed-constexpr",
   "--expt-extended-lambda",
   "--use_fast_math",
@@ -44,17 +42,15 @@ depends = ["torch", "cutlass_3_9"]
 [kernel.flash_attn_sm80]
 backend = "cuda"
-+cuda-capabilities = ["8.0", "8.6", "9.0a"]
 cuda-flags = [
   "-O3",
   "-std=c++17",
   "--ftemplate-backtrace-limit=0",              # To debug template code
-  "--use_fast_math",
-  "-DCUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1",
   "--expt-relaxed-constexpr",
   "--expt-extended-lambda",
-  "--use_fast_math",
   "-DNDEBUG",
 ]
 src = [
   "flash-attn/block.h",
@@ -191,7 +187,7 @@ depends = ["torch", "cutlass_3_9"]
 [kernel.flash_attn_sm90]
 backend = "cuda"
-cuda-capabilities = ["8.0", "9.0a"]
 cuda-flags = [
   "-O3",
   "-std=c++17",

 [general]
 name = "vllm_flash_attn3"
 universal = false
+cuda-minver = "12.0"
+cuda-maxver = "12.8"
 [torch]
 src = [
   "-O3",
   "-std=c++17",
   "--ftemplate-backtrace-limit=0",              # To debug template code
   "--expt-relaxed-constexpr",
   "--expt-extended-lambda",
   "--use_fast_math",
 [kernel.flash_attn_sm80]
 backend = "cuda"
++cuda-capabilities = ["8.0", "8.6"]
 cuda-flags = [
   "-O3",
   "-std=c++17",
   "--ftemplate-backtrace-limit=0",              # To debug template code
   "--expt-relaxed-constexpr",
   "--expt-extended-lambda",
   "-DNDEBUG",
+  "-DFLASHATTENTION_DISABLE_FP8"
 ]
 src = [
   "flash-attn/block.h",
 [kernel.flash_attn_sm90]
 backend = "cuda"
+cuda-capabilities = ["9.0a"]
 cuda-flags = [
   "-O3",
   "-std=c++17",