medmekk commited on
Commit
44b112f
·
1 Parent(s): 7017992
CMakeLists.txt CHANGED
@@ -277,6 +277,7 @@ set(_qattn_SRC
277
  "sage_attention/reduction_utils.cuh"
278
  "sage_attention/wgmma.cuh"
279
  "sage_attention/utils.cuh"
 
280
  )
281
 
282
 
 
277
  "sage_attention/reduction_utils.cuh"
278
  "sage_attention/wgmma.cuh"
279
  "sage_attention/utils.cuh"
280
+ "sage_attention/cuda_tensormap_shim.cuh"
281
  )
282
 
283
 
sage_attention/cuda_tensormap_shim.cuh ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Lightweight compatibility shim for CUDA tensor map APIs.
3
+ * Provides fallbacks for CUtensorMap and related enums when compiling
4
+ * against CUDA toolkits that don't expose these symbols in headers.
5
+ */
6
+
7
+ #pragma once
8
+
9
+ #include <cuda.h>
10
+
11
+ // Guard on CUDA version and symbol presence. Some environments have
12
+ // runtime symbols but not headers; we define minimal stand-ins.
13
+
14
+ #ifndef CU_TENSOR_MAP_L2_PROMOTION_NONE
15
+ typedef enum CUtensorMapL2promotion_enum {
16
+ CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
17
+ CU_TENSOR_MAP_L2_PROMOTION_L2_64B = 1,
18
+ CU_TENSOR_MAP_L2_PROMOTION_L2_128B = 2
19
+ } CUtensorMapL2promotion_enum;
20
+ #endif
21
+
22
+ #ifndef CUtensorMap
23
+ typedef struct CUtensorMap_st {
24
+ unsigned long long data[16];
25
+ } CUtensorMap;
26
+ #endif
27
+
28
+ #ifndef CU_TENSOR_MAP_DATA_TYPE_UINT8
29
+ typedef enum CUtensorMapDataType {
30
+ CU_TENSOR_MAP_DATA_TYPE_UINT8 = 1,
31
+ CU_TENSOR_MAP_DATA_TYPE_INT8 = 2,
32
+ CU_TENSOR_MAP_DATA_TYPE_FLOAT16 = 10,
33
+ CU_TENSOR_MAP_DATA_TYPE_BFLOAT16 = 13
34
+ } CUtensorMapDataType;
35
+ #endif
36
+
37
+ #ifndef CU_TENSOR_MAP_INTERLEAVE_NONE
38
+ typedef enum CUtensorMapInterleave_enum {
39
+ CU_TENSOR_MAP_INTERLEAVE_NONE = 0
40
+ } CUtensorMapInterleave_enum;
41
+ #endif
42
+
43
+ #ifndef CU_TENSOR_MAP_SWIZZLE_32B
44
+ typedef enum CUtensorMapSwizzle_enum {
45
+ CU_TENSOR_MAP_SWIZZLE_NONE = 0,
46
+ CU_TENSOR_MAP_SWIZZLE_32B = 1,
47
+ CU_TENSOR_MAP_SWIZZLE_64B = 2,
48
+ CU_TENSOR_MAP_SWIZZLE_128B = 3
49
+ } CUtensorMapSwizzle_enum;
50
+ #endif
51
+
52
+ #ifndef CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE
53
+ typedef enum CUtensorMapFloatOOBfill_enum {
54
+ CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0
55
+ } CUtensorMapFloatOOBfill_enum;
56
+ #endif
57
+
58
+ // We intentionally do not declare cuTensorMapEncodeTiled here; the code
59
+ // dynamically resolves it from libcuda at runtime when available.
60
+
61
+
sage_attention/qattn/qk_int_sv_f8_cuda_sm90.cu CHANGED
@@ -24,6 +24,7 @@
24
  #include "../wgmma.cuh"
25
  #include "../math.cuh"
26
  #include "../dispatch_utils.h"
 
27
 
28
  #include "attn_utils.cuh"
29
 
 
24
  #include "../wgmma.cuh"
25
  #include "../math.cuh"
26
  #include "../dispatch_utils.h"
27
+ #include "../cuda_tensormap_shim.cuh"
28
 
29
  #include "attn_utils.cuh"
30