File size: 1,116 Bytes
e05eed1
98a67a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
// SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

#pragma once

#if defined(__INTELLISENSE__) || !defined(__NVCC__)
#ifndef KERNEL_ARG2
#define KERNEL_ARG2(grid, block)
#define KERNEL_ARG3(grid, block, sh_mem)
#define KERNEL_ARG4(grid, block, sh_mem, stream)
#define __global__
#define __device__
#define __host__
#endif
#endif

#ifdef __INTELLISENSE__
#define __CUDACC__
#include <cuda_runtime.h>

void __syncthreads();  // workaround __syncthreads warning

dim3 threadIdx;
dim3 blockIdx;
dim3 blockDim;
dim3 gridDim;

#else
#ifndef KERNEL_ARG2
#define KERNEL_ARG2(grid, block) <<< grid, block >>>
#define KERNEL_ARG3(grid, block, sh_mem) <<< grid, block, sh_mem >>>
#define KERNEL_ARG4(grid, block, sh_mem, stream) <<< grid, block, sh_mem, stream >>>
#endif
#endif

#define __any_device__ __host__ __device__

#ifdef __NVCC__
#define __lib_inline__ __forceinline__

#else
#define __lib_inline__ inline
#endif

template<typename T1, typename T2>
__any_device__
inline auto div_up(T1 n, T2 d)
{
    return (n + d - 1) / d;
}