/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved. * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation * and any modifications thereto. Any use, reproduction, disclosure or * distribution of this software and related documentation without an express * license agreement from NVIDIA CORPORATION is strictly prohibited. */ #pragma once // This file declares a subset of the C runtime (CRT) functions and macros for // use by compute kernel modules. There are three environments in which this // file gets included: // - CUDA kernel modules (WP_NO_CRT and __CUDACC__). CUDA already has implicitly // declared builtins for most functions. printf() and macro definitions are // the notable exceptions. // - C++ kernel modules (WP_NO_CRT and !__CUDACC__). These can't use the CRT // directly when using a standalone compiler. The functions get obtained from // the compiler library instead (clang.dll). // - Warp runtime (!WP_NO_CRT). When building warp.dll it's fine to include the // standard C library headers, and it avoids mismatched redefinitions. #if !defined(__CUDA_ARCH__) #if defined(_WIN32) #define WP_API __declspec(dllexport) #else #define WP_API __attribute__ ((visibility ("default"))) #endif #else #define WP_API #endif #if !defined(__CUDA_ARCH__) // Helper for implementing assert() macro extern "C" WP_API void _wp_assert(const char* message, const char* file, unsigned int line); // Helper for implementing isfinite() extern "C" WP_API int _wp_isfinite(double); #endif // !__CUDA_ARCH__ #if !defined(WP_NO_CRT) #include #include #include #include #include #include #include #else // These definitions are taken from Jitify: https://github.com/NVIDIA/jitify /// float.h #define FLT_RADIX 2 #define FLT_MANT_DIG 24 #define DBL_MANT_DIG 53 #define FLT_DIG 6 #define DBL_DIG 15 #define FLT_MIN_EXP -125 #define DBL_MIN_EXP -1021 #define FLT_MIN_10_EXP -37 #define DBL_MIN_10_EXP -307 #define FLT_MAX_EXP 128 #define DBL_MAX_EXP 1024 #define FLT_MAX_10_EXP 38 #define DBL_MAX_10_EXP 308 #define FLT_MAX 3.4028234e38f #define DBL_MAX 1.7976931348623157e308 #define FLT_EPSILON 1.19209289e-7f #define DBL_EPSILON 2.220440492503130e-16 #define FLT_MIN 1.1754943e-38f #define DBL_MIN 2.2250738585072013e-308 #define FLT_ROUNDS 1 #if defined __cplusplus && __cplusplus >= 201103L #define FLT_EVAL_METHOD 0 #define DECIMAL_DIG 21 #endif /// limits.h #if defined _WIN32 || defined _WIN64 #define __WORDSIZE 32 #else #if defined __x86_64__ && !defined __ILP32__ #define __WORDSIZE 64 #else #define __WORDSIZE 32 #endif #endif #define MB_LEN_MAX 16 #define CHAR_BIT 8 #define SCHAR_MIN (-128) #define SCHAR_MAX 127 #define UCHAR_MAX 255 enum { _JITIFY_CHAR_IS_UNSIGNED = (char)-1 >= 0, CHAR_MIN = _JITIFY_CHAR_IS_UNSIGNED ? 0 : SCHAR_MIN, CHAR_MAX = _JITIFY_CHAR_IS_UNSIGNED ? UCHAR_MAX : SCHAR_MAX, }; #define SHRT_MIN (-32768) #define SHRT_MAX 32767 #define USHRT_MAX 65535 #define INT_MIN (-INT_MAX - 1) #define INT_MAX 2147483647 #define UINT_MAX 4294967295U #if __WORDSIZE == 64 # define LONG_MAX 9223372036854775807L #else # define LONG_MAX 2147483647L #endif #define LONG_MIN (-LONG_MAX - 1L) #if __WORDSIZE == 64 #define ULONG_MAX 18446744073709551615UL #else #define ULONG_MAX 4294967295UL #endif #define LLONG_MAX 9223372036854775807LL #define LLONG_MIN (-LLONG_MAX - 1LL) #define ULLONG_MAX 18446744073709551615ULL #define INFINITY ((float)(DBL_MAX * DBL_MAX)) #define HUGE_VAL ((double)INFINITY) #define HUGE_VALF ((float)INFINITY) /// stdint.h typedef signed char int8_t; typedef signed short int16_t; typedef signed int int32_t; typedef signed long long int64_t; //typedef signed char int_fast8_t; //typedef signed short int_fast16_t; //typedef signed int int_fast32_t; //typedef signed long long int_fast64_t; //typedef signed char int_least8_t; //typedef signed short int_least16_t; //typedef signed int int_least32_t; //typedef signed long long int_least64_t; //typedef signed long long intmax_t; //typedef signed long intptr_t; typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef unsigned long long uint64_t; //typedef unsigned char uint_fast8_t; //typedef unsigned short uint_fast16_t; //typedef unsigned int uint_fast32_t; //typedef unsigned long long uint_fast64_t; //typedef unsigned char uint_least8_t; //typedef unsigned short uint_least16_t; //typedef unsigned int uint_least32_t; //typedef unsigned long long uint_least64_t; //typedef unsigned long long uintmax_t; /// math.h // #if __cplusplus >= 201103L // #define DEFINE_MATH_UNARY_FUNC_WRAPPER(f) \ // inline double f(double x) { return ::f(x); } \ // inline float f##f(float x) { return ::f(x); } \ // /*inline long double f##l(long double x) { return ::f(x); }*/ \ // inline float f(float x) { return ::f(x); } \ // /*inline long double f(long double x) { return ::f(x); }*/ // #else // #define DEFINE_MATH_UNARY_FUNC_WRAPPER(f) \ // inline double f(double x) { return ::f(x); } \ // inline float f##f(float x) { return ::f(x); } \ // /*inline long double f##l(long double x) { return ::f(x); }*/ // #endif // DEFINE_MATH_UNARY_FUNC_WRAPPER(cos) // DEFINE_MATH_UNARY_FUNC_WRAPPER(sin) // DEFINE_MATH_UNARY_FUNC_WRAPPER(tan) // DEFINE_MATH_UNARY_FUNC_WRAPPER(acos) // DEFINE_MATH_UNARY_FUNC_WRAPPER(asin) // DEFINE_MATH_UNARY_FUNC_WRAPPER(atan) // template inline T atan2(T y, T x) { return ::atan2(y, x); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(cosh) // DEFINE_MATH_UNARY_FUNC_WRAPPER(sinh) // DEFINE_MATH_UNARY_FUNC_WRAPPER(tanh) // DEFINE_MATH_UNARY_FUNC_WRAPPER(exp) // template inline T frexp(T x, int* exp) { return ::frexp(x, exp); } // template inline T ldexp(T x, int exp) { return ::ldexp(x, exp); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(log) // DEFINE_MATH_UNARY_FUNC_WRAPPER(log10) // template inline T modf(T x, T* intpart) { return ::modf(x, intpart); } // template inline T pow(T x, T y) { return ::pow(x, y); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(sqrt) // template inline T fmod(T n, T d) { return ::fmod(n, d); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(fabs) // template inline T abs(T x) { return ::abs(x); } // #if __cplusplus >= 201103L // DEFINE_MATH_UNARY_FUNC_WRAPPER(acosh) // DEFINE_MATH_UNARY_FUNC_WRAPPER(asinh) // DEFINE_MATH_UNARY_FUNC_WRAPPER(atanh) // DEFINE_MATH_UNARY_FUNC_WRAPPER(exp2) // DEFINE_MATH_UNARY_FUNC_WRAPPER(expm1) // template inline int ilogb(T x) { return ::ilogb(x); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(log1p) // DEFINE_MATH_UNARY_FUNC_WRAPPER(log2) // DEFINE_MATH_UNARY_FUNC_WRAPPER(logb) // template inline T scalbn (T x, int n) { return ::scalbn(x, n); } // template inline T scalbln(T x, long n) { return ::scalbn(x, n); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(cbrt) // template inline T hypot(T x, T y) { return ::hypot(x, y); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(erf) // DEFINE_MATH_UNARY_FUNC_WRAPPER(erfc) // DEFINE_MATH_UNARY_FUNC_WRAPPER(tgamma) // DEFINE_MATH_UNARY_FUNC_WRAPPER(lgamma) // DEFINE_MATH_UNARY_FUNC_WRAPPER(round) // DEFINE_MATH_UNARY_FUNC_WRAPPER(rint) // DEFINE_MATH_UNARY_FUNC_WRAPPER(trunc) // DEFINE_MATH_UNARY_FUNC_WRAPPER(floor) // DEFINE_MATH_UNARY_FUNC_WRAPPER(ceil) // template inline long lround(T x) { return ::lround(x); } // template inline long long llround(T x) { return ::llround(x); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(rint) // template inline long lrint(T x) { return ::lrint(x); } // template inline long long llrint(T x) { return ::llrint(x); } // DEFINE_MATH_UNARY_FUNC_WRAPPER(nearbyint) // //DEFINE_MATH_UNARY_FUNC_WRAPPER(isfinite) // // TODO: remainder, remquo, copysign, nan, nextafter, nexttoward, fdim, // // fmax, fmin, fma // #endif // #undef DEFINE_MATH_UNARY_FUNC_WRAPPER #define M_PI 3.14159265358979323846 #if defined(__CUDACC__) #if defined(__clang__) // When compiling CUDA with barebones Clang we need to define its builtins and runtime functions ourselves. #include "cuda_crt.h" #endif #else extern "C" { // stdio.h int printf(const char * format, ... ); // stdlib.h int abs(int); long long llabs(long long); // math.h float fmodf(float, float); double fmod(double, double); float logf(float); double log(double); float log2f(float); double log2(double); float log10f(float); double log10(double); float expf(float); double exp(double); float sqrtf(float); double sqrt(double); float cbrtf(float); double cbrt(double); float powf(float, float); double pow(double, double); float floorf(float); double floor(double); float ceilf(float); double ceil(double); float fabsf(float); double fabs(double); float roundf(float); double round(double); float truncf(float); double trunc(double); float rintf(float); double rint(double); float acosf(float); double acos(double); float asinf(float); double asin(double); float atanf(float); double atan(double); float atan2f(float, float); double atan2(double, double); float cosf(float); double cos(double); float sinf(float); double sin(double); float tanf(float); double tan(double); float sinhf(float); double sinh(double); float coshf(float); double cosh(double); float tanhf(float); double tanh(double); float fmaf(float, float, float); // stddef.h #if defined(_WIN32) using size_t = unsigned __int64; #else using size_t = unsigned long; #endif // string.h void* memset(void*, int, size_t); void* memcpy(void*, const void*, size_t); // stdlib.h void* malloc(size_t); void free(void*); } // extern "C" // cmath inline bool isfinite(double x) { return _wp_isfinite(x); } // assert.h #ifdef NDEBUG #define assert(expression) ((void)0) #else #define assert(expression) (void)( \ (!!(expression)) || \ (_wp_assert((#expression), (__FILE__), (unsigned)(__LINE__)), 0) \ ) #endif #endif // !__CUDACC__ #endif // WP_NO_CRT