[general] name = "vllm_flash_attn3" universal = false [torch] src = [ "torch-ext/torch_binding.cpp", "torch-ext/torch_binding.h", ] [kernel.layer-norm] depends = ["torch"] backend = "cuda" include = ["."] src = [ "layer-norm/ln.h" "layer-norm/ln_api.cpp", "layer-norm/ln_bwd_1024.cu", "layer-norm/ln_bwd_1280.cu", "layer-norm/ln_bwd_1536.cu", "layer-norm/ln_bwd_2048.cu", "layer-norm/ln_bwd_256.cu", "layer-norm/ln_bwd_2560.cu", "layer-norm/ln_bwd_3072.cu", "layer-norm/ln_bwd_4096.cu", "layer-norm/ln_bwd_512.cu", "layer-norm/ln_bwd_5120.cu", "layer-norm/ln_bwd_6144.cu", "layer-norm/ln_bwd_7168.cu", "layer-norm/ln_bwd_768.cu", "layer-norm/ln_bwd_8192.cu", "layer-norm/ln_bwd_kernels.cuh", "layer-norm/ln_fwd_1024.cu", "layer-norm/ln_fwd_1280.cu", "layer-norm/ln_fwd_1536.cu", "layer-norm/ln_fwd_2048.cu", "layer-norm/ln_fwd_256.cu", "layer-norm/ln_fwd_2560.cu", "layer-norm/ln_fwd_3072.cu", "layer-norm/ln_fwd_4096.cu", "layer-norm/ln_fwd_512.cu", "layer-norm/ln_fwd_5120.cu", "layer-norm/ln_fwd_6144.cu", "layer-norm/ln_fwd_7168.cu", "layer-norm/ln_fwd_768.cu", "layer-norm/ln_fwd_8192.cu", "layer-norm/ln_fwd_kernels.cuh", "layer-norm/ln_kernel_traits.h", "layer-norm/ln_parallel_bwd_1024.cu", "layer-norm/ln_parallel_bwd_1280.cu", "layer-norm/ln_parallel_bwd_1536.cu", "layer-norm/ln_parallel_bwd_2048.cu", "layer-norm/ln_parallel_bwd_256.cu", "layer-norm/ln_parallel_bwd_2560.cu", "layer-norm/ln_parallel_bwd_3072.cu", "layer-norm/ln_parallel_bwd_4096.cu", "layer-norm/ln_parallel_bwd_512.cu", "layer-norm/ln_parallel_bwd_5120.cu", "layer-norm/ln_parallel_bwd_6144.cu", "layer-norm/ln_parallel_bwd_7168.cu", "layer-norm/ln_parallel_bwd_768.cu", "layer-norm/ln_parallel_bwd_8192.cu", "layer-norm/ln_parallel_fwd_1024.cu", "layer-norm/ln_parallel_fwd_1280.cu", "layer-norm/ln_parallel_fwd_1536.cu", "layer-norm/ln_parallel_fwd_2048.cu", "layer-norm/ln_parallel_fwd_256.cu", "layer-norm/ln_parallel_fwd_2560.cu", "layer-norm/ln_parallel_fwd_3072.cu", "layer-norm/ln_parallel_fwd_4096.cu", "layer-norm/ln_parallel_fwd_512.cu", "layer-norm/ln_parallel_fwd_5120.cu", "layer-norm/ln_parallel_fwd_6144.cu", "layer-norm/ln_parallel_fwd_7168.cu", "layer-norm/ln_parallel_fwd_768.cu", "layer-norm/ln_parallel_fwd_8192.cu", "layer-norm/ln_parallel_residual_bwd_kernels.cuh", "layer-norm/ln_parallel_residual_fwd_kernels.cuh", "layer-norm/ln_utils.cuh", "layer-norm/static_switch.h" ] cuda-flags = [ "-O3", "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__", "-U__CUDA_NO_BFLOAT16_OPERATORS__", "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", "-U__CUDA_NO_BFLOAT162_OPERATORS__", "-U__CUDA_NO_BFLOAT162_CONVERSIONS__", "--expt-relaxed-constexpr", "--expt-extended-lambda", "--use_fast_math", ]