medmekk
commited on
Commit
·
44b112f
1
Parent(s):
7017992
add_ship
Browse files
CMakeLists.txt
CHANGED
|
@@ -277,6 +277,7 @@ set(_qattn_SRC
|
|
| 277 |
"sage_attention/reduction_utils.cuh"
|
| 278 |
"sage_attention/wgmma.cuh"
|
| 279 |
"sage_attention/utils.cuh"
|
|
|
|
| 280 |
)
|
| 281 |
|
| 282 |
|
|
|
|
| 277 |
"sage_attention/reduction_utils.cuh"
|
| 278 |
"sage_attention/wgmma.cuh"
|
| 279 |
"sage_attention/utils.cuh"
|
| 280 |
+
"sage_attention/cuda_tensormap_shim.cuh"
|
| 281 |
)
|
| 282 |
|
| 283 |
|
sage_attention/cuda_tensormap_shim.cuh
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Lightweight compatibility shim for CUDA tensor map APIs.
|
| 3 |
+
* Provides fallbacks for CUtensorMap and related enums when compiling
|
| 4 |
+
* against CUDA toolkits that don't expose these symbols in headers.
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
#include <cuda.h>
|
| 10 |
+
|
| 11 |
+
// Guard on CUDA version and symbol presence. Some environments have
|
| 12 |
+
// runtime symbols but not headers; we define minimal stand-ins.
|
| 13 |
+
|
| 14 |
+
#ifndef CU_TENSOR_MAP_L2_PROMOTION_NONE
|
| 15 |
+
typedef enum CUtensorMapL2promotion_enum {
|
| 16 |
+
CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
|
| 17 |
+
CU_TENSOR_MAP_L2_PROMOTION_L2_64B = 1,
|
| 18 |
+
CU_TENSOR_MAP_L2_PROMOTION_L2_128B = 2
|
| 19 |
+
} CUtensorMapL2promotion_enum;
|
| 20 |
+
#endif
|
| 21 |
+
|
| 22 |
+
#ifndef CUtensorMap
|
| 23 |
+
typedef struct CUtensorMap_st {
|
| 24 |
+
unsigned long long data[16];
|
| 25 |
+
} CUtensorMap;
|
| 26 |
+
#endif
|
| 27 |
+
|
| 28 |
+
#ifndef CU_TENSOR_MAP_DATA_TYPE_UINT8
|
| 29 |
+
typedef enum CUtensorMapDataType {
|
| 30 |
+
CU_TENSOR_MAP_DATA_TYPE_UINT8 = 1,
|
| 31 |
+
CU_TENSOR_MAP_DATA_TYPE_INT8 = 2,
|
| 32 |
+
CU_TENSOR_MAP_DATA_TYPE_FLOAT16 = 10,
|
| 33 |
+
CU_TENSOR_MAP_DATA_TYPE_BFLOAT16 = 13
|
| 34 |
+
} CUtensorMapDataType;
|
| 35 |
+
#endif
|
| 36 |
+
|
| 37 |
+
#ifndef CU_TENSOR_MAP_INTERLEAVE_NONE
|
| 38 |
+
typedef enum CUtensorMapInterleave_enum {
|
| 39 |
+
CU_TENSOR_MAP_INTERLEAVE_NONE = 0
|
| 40 |
+
} CUtensorMapInterleave_enum;
|
| 41 |
+
#endif
|
| 42 |
+
|
| 43 |
+
#ifndef CU_TENSOR_MAP_SWIZZLE_32B
|
| 44 |
+
typedef enum CUtensorMapSwizzle_enum {
|
| 45 |
+
CU_TENSOR_MAP_SWIZZLE_NONE = 0,
|
| 46 |
+
CU_TENSOR_MAP_SWIZZLE_32B = 1,
|
| 47 |
+
CU_TENSOR_MAP_SWIZZLE_64B = 2,
|
| 48 |
+
CU_TENSOR_MAP_SWIZZLE_128B = 3
|
| 49 |
+
} CUtensorMapSwizzle_enum;
|
| 50 |
+
#endif
|
| 51 |
+
|
| 52 |
+
#ifndef CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE
|
| 53 |
+
typedef enum CUtensorMapFloatOOBfill_enum {
|
| 54 |
+
CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0
|
| 55 |
+
} CUtensorMapFloatOOBfill_enum;
|
| 56 |
+
#endif
|
| 57 |
+
|
| 58 |
+
// We intentionally do not declare cuTensorMapEncodeTiled here; the code
|
| 59 |
+
// dynamically resolves it from libcuda at runtime when available.
|
| 60 |
+
|
| 61 |
+
|
sage_attention/qattn/qk_int_sv_f8_cuda_sm90.cu
CHANGED
|
@@ -24,6 +24,7 @@
|
|
| 24 |
#include "../wgmma.cuh"
|
| 25 |
#include "../math.cuh"
|
| 26 |
#include "../dispatch_utils.h"
|
|
|
|
| 27 |
|
| 28 |
#include "attn_utils.cuh"
|
| 29 |
|
|
|
|
| 24 |
#include "../wgmma.cuh"
|
| 25 |
#include "../math.cuh"
|
| 26 |
#include "../dispatch_utils.h"
|
| 27 |
+
#include "../cuda_tensormap_shim.cuh"
|
| 28 |
|
| 29 |
#include "attn_utils.cuh"
|
| 30 |
|