| [general] |
| version = "0.0.1" |
|
|
| [torch] |
| name = "paged_attention" |
| src = [ |
| "torch-ext/registration.h", |
| "torch-ext/torch_binding.cpp", |
| "torch-ext/torch_binding.h" |
| ] |
| pyroot = "torch-ext" |
|
|
| [kernel.cuda_utils] |
| capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ] |
| src = [ |
| "cuda-utils/cuda_utils_kernels.cu", |
| ] |
| depends = [] |
| |
|
|
| [kernel.paged_attention] |
| capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ] |
| src = [ |
| "paged-attention/attention/attention_dtypes.h", |
| "paged-attention/attention/attention_generic.cuh", |
| "paged-attention/attention/attention_kernels.cuh", |
| "paged-attention/attention/attention_utils.cuh", |
| "paged-attention/attention/dtype_bfloat16.cuh", |
| "paged-attention/attention/dtype_float16.cuh", |
| "paged-attention/attention/dtype_float32.cuh", |
| "paged-attention/attention/dtype_fp8.cuh", |
| "paged-attention/attention/paged_attention_v1.cu", |
| "paged-attention/attention/paged_attention_v2.cu", |
| "paged-attention/cache_kernels.cu", |
| "paged-attention/cuda_compat.h", |
| "paged-attention/dispatch_utils.h", |
| "paged-attention/quantization/fp8/amd/hip_float8.h", |
| "paged-attention/quantization/fp8/amd/hip_float8_impl.h", |
| "paged-attention/quantization/fp8/amd/quant_utils.cuh", |
| "paged-attention/quantization/fp8/nvidia/quant_utils.cuh", |
| ] |
| include = [ "." ] |
| depends = [ "torch" ] |
|
|
|
|