File size: 3,142 Bytes
b4cad21 116e562 b4cad21 5c6fb68 c31b5ce 116e562 b4cad21 5c6fb68 b4cad21 116e562 b4cad21 0da5bf5 b4cad21 0da5bf5 b4cad21 116e562 b4cad21 0da5bf5 b4cad21 0da5bf5 b4cad21 0da5bf5 5c6fb68 3c8bb73 116e562 3c8bb73 5c6fb68 3c8bb73 5c6fb68 0da5bf5 5c6fb68 116e562 5c6fb68 3c8bb73 116e562 3c8bb73 5c6fb68 c31b5ce 116e562 c31b5ce 165b25c 116e562 165b25c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
[general]
name = "quantization"
[torch]
src = [
"core/registration.h",
"core/scalar_type.hpp",
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h"
]
include = [ "." ]
[kernel.cutlass_w8a8]
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "9.0a" ]
src = [
"core/math.hpp",
"cutlass_w8a8/common.hpp",
"cutlass_w8a8/scaled_mm_c2x.cu",
"cutlass_w8a8/scaled_mm_c2x.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm75_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm80_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh",
"cutlass_w8a8/scaled_mm_entry.cu",
"cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp",
"cutlass_extensions/epilogue/broadcast_load_epilogue_c2x.hpp",
]
include = [ "." ]
depends = [ "cutlass_3_6", "torch" ]
[kernel.cutlass_w8a8_hopper]
cuda-capabilities = [ "9.0", "9.0a" ]
src = [
"core/math.hpp",
"cutlass_w8a8/common.hpp",
"cutlass_w8a8/scaled_mm_c3x.cu",
"cutlass_w8a8/scaled_mm_c3x.cuh",
"cutlass_w8a8/scaled_mm_c3x_sm90_fp8_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c3x_sm90_int8_dispatch.cuh",
"cutlass_extensions/common.cpp",
"cutlass_extensions/common.hpp",
"cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp",
"cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp",
]
include = [ "." ]
depends = [ "cutlass_3_6", "torch" ]
[kernel.fp8_common]
language = "cuda-hipify"
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "9.0a" ]
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
src = [
"fp8/amd/hip_float8.h",
"fp8/amd/hip_float8_impl.h",
"fp8/common.cu",
"fp8/common.cuh",
"dispatch_utils.h",
"vectorization.cuh"
]
include = [ "." ]
depends = [ "torch" ]
[kernel.fp8_marlin]
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "9.0a" ]
src = [
"fp8/fp8_marlin.cu",
"gptq_marlin/marlin.cuh",
"gptq_marlin/marlin_dtypes.cuh",
]
depends = [ "torch" ]
[kernel.int8_common]
language = "cuda-hipify"
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "9.0a" ]
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
src = [
"compressed_tensors/int8_quant_kernels.cu",
"dispatch_utils.h"
]
include = [ "." ]
depends = [ "torch" ]
[kernel.gptq_marlin]
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "9.0a" ]
src = [
"core/scalar_type.hpp",
"gptq_marlin/awq_marlin_repack.cu",
"gptq_marlin/gptq_marlin.cu",
"gptq_marlin/gptq_marlin_repack.cu",
"gptq_marlin/marlin.cuh",
"gptq_marlin/marlin_dtypes.cuh"
]
include = [ "." ]
depends = [ "torch" ]
[kernel.marlin]
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "9.0a" ]
src = [
"core/scalar_type.hpp",
"marlin/dense/common/base.h",
"marlin/dense/common/mem.h",
"marlin/dense/marlin_cuda_kernel.cu",
"marlin/qqq/marlin_qqq_gemm_kernel.cu",
"marlin/sparse/common/base.h",
"marlin/sparse/common/mem.h",
"marlin/sparse/common/mma.h",
"marlin/sparse/marlin_24_cuda_kernel.cu"
]
include = [ "." ]
depends = [ "torch" ]
|