| | [general] |
| | name = "flash_attn" |
| | universal=false |
| |
|
| | [torch] |
| | src = ["torch-ext/torch_binding.cpp", "torch-ext/torch_binding.h"] |
| |
|
| | [kernel.flash_attn] |
| | backend = "cuda" |
| | cuda-capabilities = [ |
| | "8.0", |
| | "9.0", |
| | "10.0", |
| | "12.0", |
| | ] |
| | src = [ |
| | "flash_attn/flash_api.cpp", |
| |
|
| | "flash_attn/src/philox_unpack.cuh", |
| | "flash_attn/src/namespace_config.h", |
| | "flash_attn/src/hardware_info.h", |
| | "flash_attn/src/flash.h", |
| | "flash_attn/src/static_switch.h", |
| | "flash_attn/src/alibi.h", |
| | "flash_attn/src/block_info.h", |
| | "flash_attn/src/dropout.h", |
| | "flash_attn/src/kernel_traits.h", |
| | "flash_attn/src/mask.h", |
| | "flash_attn/src/philox.cuh", |
| | "flash_attn/src/rotary.h", |
| | "flash_attn/src/softmax.h", |
| | "flash_attn/src/utils.h", |
| |
|
| | |
| | "flash_attn/src/flash_bwd_hdim128_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim128_bf16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim128_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim128_fp16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim192_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim192_bf16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim192_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim192_fp16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim256_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim256_bf16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim256_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim256_fp16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim32_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim32_bf16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim32_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim32_fp16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim64_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim64_bf16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim64_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim64_fp16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim96_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim96_bf16_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim96_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_bwd_hdim96_fp16_sm80.cu", |
| | "flash_attn/src/flash_bwd_kernel.h", |
| | "flash_attn/src/flash_bwd_launch_template.h", |
| | "flash_attn/src/flash_bwd_preprocess_kernel.h", |
| |
|
| | |
| | "flash_attn/src/flash_fwd_hdim128_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim128_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim128_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim128_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim192_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim192_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim192_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim192_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim256_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim256_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim256_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim256_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim32_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim32_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim32_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim32_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim64_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim64_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim64_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim64_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim96_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim96_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim96_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_hdim96_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_kernel.h", |
| | "flash_attn/src/flash_fwd_launch_template.h", |
| |
|
| | |
| | "flash_attn/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim128_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim128_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim192_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim192_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim256_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim256_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim32_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim32_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim64_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim64_fp16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim96_bf16_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu", |
| | "flash_attn/src/flash_fwd_split_hdim96_fp16_sm80.cu", |
| | ] |
| | depends = ["torch", "cutlass_3_6"] |
| |
|