add_ship

Browse files

Files changed (3) hide show

CMakeLists.txt +1 -0
sage_attention/cuda_tensormap_shim.cuh +61 -0
sage_attention/qattn/qk_int_sv_f8_cuda_sm90.cu +1 -0

CMakeLists.txt CHANGED Viewed

@@ -277,6 +277,7 @@ set(_qattn_SRC
 "sage_attention/reduction_utils.cuh"
 "sage_attention/wgmma.cuh"
 "sage_attention/utils.cuh"
 )

 "sage_attention/reduction_utils.cuh"
 "sage_attention/wgmma.cuh"
 "sage_attention/utils.cuh"
+"sage_attention/cuda_tensormap_shim.cuh"
 )

sage_attention/cuda_tensormap_shim.cuh ADDED Viewed

	@@ -0,0 +1,61 @@

+/*
+ * Lightweight compatibility shim for CUDA tensor map APIs.
+ * Provides fallbacks for CUtensorMap and related enums when compiling
+ * against CUDA toolkits that don't expose these symbols in headers.
+ */
+#pragma once
+#include <cuda.h>
+// Guard on CUDA version and symbol presence. Some environments have
+// runtime symbols but not headers; we define minimal stand-ins.
+#ifndef CU_TENSOR_MAP_L2_PROMOTION_NONE
+typedef enum CUtensorMapL2promotion_enum {
+    CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
+    CU_TENSOR_MAP_L2_PROMOTION_L2_64B = 1,
+    CU_TENSOR_MAP_L2_PROMOTION_L2_128B = 2
+} CUtensorMapL2promotion_enum;
+#endif
+#ifndef CUtensorMap
+typedef struct CUtensorMap_st {
+    unsigned long long data[16];
+} CUtensorMap;
+#endif
+#ifndef CU_TENSOR_MAP_DATA_TYPE_UINT8
+typedef enum CUtensorMapDataType {
+    CU_TENSOR_MAP_DATA_TYPE_UINT8 = 1,
+    CU_TENSOR_MAP_DATA_TYPE_INT8 = 2,
+    CU_TENSOR_MAP_DATA_TYPE_FLOAT16 = 10,
+    CU_TENSOR_MAP_DATA_TYPE_BFLOAT16 = 13
+} CUtensorMapDataType;
+#endif
+#ifndef CU_TENSOR_MAP_INTERLEAVE_NONE
+typedef enum CUtensorMapInterleave_enum {
+    CU_TENSOR_MAP_INTERLEAVE_NONE = 0
+} CUtensorMapInterleave_enum;
+#endif
+#ifndef CU_TENSOR_MAP_SWIZZLE_32B
+typedef enum CUtensorMapSwizzle_enum {
+    CU_TENSOR_MAP_SWIZZLE_NONE = 0,
+    CU_TENSOR_MAP_SWIZZLE_32B = 1,
+    CU_TENSOR_MAP_SWIZZLE_64B = 2,
+    CU_TENSOR_MAP_SWIZZLE_128B = 3
+} CUtensorMapSwizzle_enum;
+#endif
+#ifndef CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE
+typedef enum CUtensorMapFloatOOBfill_enum {
+    CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0
+} CUtensorMapFloatOOBfill_enum;
+#endif
+// We intentionally do not declare cuTensorMapEncodeTiled here; the code
+// dynamically resolves it from libcuda at runtime when available.

sage_attention/qattn/qk_int_sv_f8_cuda_sm90.cu CHANGED Viewed

@@ -24,6 +24,7 @@
 #include "../wgmma.cuh"
 #include "../math.cuh"
 #include "../dispatch_utils.h"
 #include "attn_utils.cuh"

 #include "../wgmma.cuh"
 #include "../math.cuh"
 #include "../dispatch_utils.h"
+#include "../cuda_tensormap_shim.cuh"
 #include "attn_utils.cuh"