bababababooey commited on
Commit
e76fe3a
·
verified ·
1 Parent(s): f62c8dc

Update build.toml

Browse files
Files changed (1) hide show
  1. build.toml +5 -9
build.toml CHANGED
@@ -1,8 +1,8 @@
1
  [general]
2
  name = "vllm_flash_attn3"
3
  universal = false
4
- cuda-minver = "12.4"
5
- cuda-maxver = "12.4"
6
 
7
  [torch]
8
  src = [
@@ -18,8 +18,6 @@ cuda-flags = [
18
  "-O3",
19
  "-std=c++17",
20
  "--ftemplate-backtrace-limit=0", # To debug template code
21
- "--use_fast_math",
22
- "-DCUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1"
23
  "--expt-relaxed-constexpr",
24
  "--expt-extended-lambda",
25
  "--use_fast_math",
@@ -44,17 +42,15 @@ depends = ["torch", "cutlass_3_9"]
44
 
45
  [kernel.flash_attn_sm80]
46
  backend = "cuda"
47
- +cuda-capabilities = ["8.0", "8.6", "9.0a"]
48
  cuda-flags = [
49
  "-O3",
50
  "-std=c++17",
51
  "--ftemplate-backtrace-limit=0", # To debug template code
52
- "--use_fast_math",
53
- "-DCUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1",
54
  "--expt-relaxed-constexpr",
55
  "--expt-extended-lambda",
56
- "--use_fast_math",
57
  "-DNDEBUG",
 
58
  ]
59
  src = [
60
  "flash-attn/block.h",
@@ -191,7 +187,7 @@ depends = ["torch", "cutlass_3_9"]
191
 
192
  [kernel.flash_attn_sm90]
193
  backend = "cuda"
194
- cuda-capabilities = ["8.0", "9.0a"]
195
  cuda-flags = [
196
  "-O3",
197
  "-std=c++17",
 
1
  [general]
2
  name = "vllm_flash_attn3"
3
  universal = false
4
+ cuda-minver = "12.0"
5
+ cuda-maxver = "12.8"
6
 
7
  [torch]
8
  src = [
 
18
  "-O3",
19
  "-std=c++17",
20
  "--ftemplate-backtrace-limit=0", # To debug template code
 
 
21
  "--expt-relaxed-constexpr",
22
  "--expt-extended-lambda",
23
  "--use_fast_math",
 
42
 
43
  [kernel.flash_attn_sm80]
44
  backend = "cuda"
45
+ +cuda-capabilities = ["8.0", "8.6"]
46
  cuda-flags = [
47
  "-O3",
48
  "-std=c++17",
49
  "--ftemplate-backtrace-limit=0", # To debug template code
 
 
50
  "--expt-relaxed-constexpr",
51
  "--expt-extended-lambda",
 
52
  "-DNDEBUG",
53
+ "-DFLASHATTENTION_DISABLE_FP8"
54
  ]
55
  src = [
56
  "flash-attn/block.h",
 
187
 
188
  [kernel.flash_attn_sm90]
189
  backend = "cuda"
190
+ cuda-capabilities = ["9.0a"]
191
  cuda-flags = [
192
  "-O3",
193
  "-std=c++17",