/** Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 */

#pragma once

// This file declares a subset of the C runtime (CRT) functions and macros for
// use by compute kernel modules. There are three environments in which this
// file gets included:
// - CUDA kernel modules (WP_NO_CRT and __CUDACC__). CUDA already has implicitly
//   declared builtins for most functions. printf() and macro definitions are
//   the notable exceptions.
// - C++ kernel modules (WP_NO_CRT and !__CUDACC__). These can't use the CRT
//   directly when using a standalone compiler. The functions get obtained from
//   the compiler library instead (clang.dll).
// - Warp runtime (!WP_NO_CRT). When building warp.dll it's fine to include the
//   standard C library headers, and it avoids mismatched redefinitions.

#if !defined(__CUDA_ARCH__)
    #if defined(_WIN32)
        #define WP_API __declspec(dllexport)
    #else
        #define WP_API __attribute__ ((visibility ("default")))
    #endif
#else
    #define WP_API
#endif

#if !defined(__CUDA_ARCH__)

// Helper for implementing assert() macro
extern "C" WP_API void _wp_assert(const char* message, const char* file, unsigned int line);

// Helper for implementing isfinite()
extern "C" WP_API int _wp_isfinite(double);

#endif  // !__CUDA_ARCH__

#if !defined(WP_NO_CRT)

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <float.h>
#include <string.h>

#else

// These definitions are taken from Jitify: https://github.com/NVIDIA/jitify

/// float.h
#define FLT_RADIX       2
#define FLT_MANT_DIG    24
#define DBL_MANT_DIG    53
#define FLT_DIG         6
#define DBL_DIG         15
#define FLT_MIN_EXP     -125
#define DBL_MIN_EXP     -1021
#define FLT_MIN_10_EXP  -37
#define DBL_MIN_10_EXP  -307
#define FLT_MAX_EXP     128
#define DBL_MAX_EXP     1024
#define FLT_MAX_10_EXP  38
#define DBL_MAX_10_EXP  308
#define FLT_MAX         3.4028234e38f
#define DBL_MAX         1.7976931348623157e308
#define FLT_EPSILON     1.19209289e-7f
#define DBL_EPSILON     2.220440492503130e-16
#define FLT_MIN         1.1754943e-38f
#define DBL_MIN         2.2250738585072013e-308
#define FLT_ROUNDS      1
#if defined __cplusplus && __cplusplus >= 201103L
#define FLT_EVAL_METHOD 0
#define DECIMAL_DIG     21
#endif

/// limits.h
#if defined _WIN32 || defined _WIN64
 #define __WORDSIZE 32
#else
 #if defined __x86_64__ && !defined __ILP32__
  #define __WORDSIZE 64
 #else
  #define __WORDSIZE 32
 #endif
#endif
#define MB_LEN_MAX  16
#define CHAR_BIT    8
#define SCHAR_MIN   (-128)
#define SCHAR_MAX   127
#define UCHAR_MAX   255
enum {
  _JITIFY_CHAR_IS_UNSIGNED = (char)-1 >= 0,
  CHAR_MIN = _JITIFY_CHAR_IS_UNSIGNED ? 0 : SCHAR_MIN,
  CHAR_MAX = _JITIFY_CHAR_IS_UNSIGNED ? UCHAR_MAX : SCHAR_MAX,
};
#define SHRT_MIN    (-32768)
#define SHRT_MAX    32767
#define USHRT_MAX   65535
#define INT_MIN     (-INT_MAX - 1)
#define INT_MAX     2147483647
#define UINT_MAX    4294967295U
#if __WORDSIZE == 64
 # define LONG_MAX  9223372036854775807L
#else
 # define LONG_MAX  2147483647L
#endif
#define LONG_MIN    (-LONG_MAX - 1L)
#if __WORDSIZE == 64
 #define ULONG_MAX  18446744073709551615UL
#else
 #define ULONG_MAX  4294967295UL
#endif
#define LLONG_MAX  9223372036854775807LL
#define LLONG_MIN  (-LLONG_MAX - 1LL)
#define ULLONG_MAX 18446744073709551615ULL

#define INFINITY   ((float)(DBL_MAX * DBL_MAX))
#define HUGE_VAL   ((double)INFINITY)
#define HUGE_VALF  ((float)INFINITY)

/// stdint.h
typedef signed char      int8_t;
typedef signed short     int16_t;
typedef signed int       int32_t;
typedef signed long long int64_t;
//typedef signed char      int_fast8_t;
//typedef signed short     int_fast16_t;
//typedef signed int       int_fast32_t;
//typedef signed long long int_fast64_t;
//typedef signed char      int_least8_t;
//typedef signed short     int_least16_t;
//typedef signed int       int_least32_t;
//typedef signed long long int_least64_t;
//typedef signed long long intmax_t;
//typedef signed long      intptr_t; 
typedef unsigned char      uint8_t;
typedef unsigned short     uint16_t;
typedef unsigned int       uint32_t;
typedef unsigned long long uint64_t;
//typedef unsigned char      uint_fast8_t;
//typedef unsigned short     uint_fast16_t;
//typedef unsigned int       uint_fast32_t;
//typedef unsigned long long uint_fast64_t;
//typedef unsigned char      uint_least8_t;
//typedef unsigned short     uint_least16_t;
//typedef unsigned int       uint_least32_t;
//typedef unsigned long long uint_least64_t;
//typedef unsigned long long uintmax_t;


/// math.h

// #if __cplusplus >= 201103L
// #define DEFINE_MATH_UNARY_FUNC_WRAPPER(f) \
// 	inline double      f(double x)         { return ::f(x); } \
// 	inline float       f##f(float x)       { return ::f(x); } \
// 	/*inline long double f##l(long double x) { return ::f(x); }*/ \
// 	inline float       f(float x)          { return ::f(x); } \
// 	/*inline long double f(long double x)    { return ::f(x); }*/
// #else
// #define DEFINE_MATH_UNARY_FUNC_WRAPPER(f) \
// 	inline double      f(double x)         { return ::f(x); } \
// 	inline float       f##f(float x)       { return ::f(x); } \
// 	/*inline long double f##l(long double x) { return ::f(x); }*/
// #endif
// DEFINE_MATH_UNARY_FUNC_WRAPPER(cos)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(sin)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(tan)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(acos)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(asin)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(atan)
// template<typename T> inline T atan2(T y, T x) { return ::atan2(y, x); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(cosh)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(sinh)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(tanh)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(exp)
// template<typename T> inline T frexp(T x, int* exp) { return ::frexp(x, exp); }
// template<typename T> inline T ldexp(T x, int  exp) { return ::ldexp(x, exp); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(log)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(log10)
// template<typename T> inline T modf(T x, T* intpart) { return ::modf(x, intpart); }
// template<typename T> inline T pow(T x, T y) { return ::pow(x, y); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(sqrt)
// template<typename T> inline T fmod(T n, T d) { return ::fmod(n, d); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(fabs)
// template<typename T> inline T abs(T x) { return ::abs(x); }
// #if __cplusplus >= 201103L
// DEFINE_MATH_UNARY_FUNC_WRAPPER(acosh)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(asinh)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(atanh)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(exp2)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(expm1)
// template<typename T> inline int ilogb(T x) { return ::ilogb(x); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(log1p)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(log2)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(logb)
// template<typename T> inline T scalbn (T x, int n)  { return ::scalbn(x, n); }
// template<typename T> inline T scalbln(T x, long n) { return ::scalbn(x, n); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(cbrt)
// template<typename T> inline T hypot(T x, T y) { return ::hypot(x, y); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(erf)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(erfc)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(tgamma)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(lgamma)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(round)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(rint)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(trunc)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(floor)
// DEFINE_MATH_UNARY_FUNC_WRAPPER(ceil)
// template<typename T> inline long lround(T x) { return ::lround(x); }
// template<typename T> inline long long llround(T x) { return ::llround(x); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(rint)
// template<typename T> inline long lrint(T x) { return ::lrint(x); }
// template<typename T> inline long long llrint(T x) { return ::llrint(x); }
// DEFINE_MATH_UNARY_FUNC_WRAPPER(nearbyint)
// //DEFINE_MATH_UNARY_FUNC_WRAPPER(isfinite)
// // TODO: remainder, remquo, copysign, nan, nextafter, nexttoward, fdim,
// // fmax, fmin, fma
// #endif
// #undef DEFINE_MATH_UNARY_FUNC_WRAPPER

#define M_PI 3.14159265358979323846

#if defined(__CUDACC__)

#if defined(__clang__)
// When compiling CUDA with barebones Clang we need to define its builtins and runtime functions ourselves.
#include "cuda_crt.h"
#endif

#else

extern "C" {

// stdio.h
int printf(const char * format, ... );

// stdlib.h
int abs(int);
long long llabs(long long);

// math.h
float fmodf(float, float);
double fmod(double, double);
float logf(float);
double log(double);
float log2f(float);
double log2(double);
float log10f(float);
double log10(double);
float expf(float);
double exp(double);
float sqrtf(float);
double sqrt(double);
float cbrtf(float);
double cbrt(double);
float powf(float, float);
double pow(double, double);
float floorf(float);
double floor(double);
float ceilf(float);
double ceil(double);
float fabsf(float);
double fabs(double);
float roundf(float);
double round(double);
float truncf(float);
double trunc(double);
float rintf(float);
double rint(double);
float acosf(float);
double acos(double);
float asinf(float);
double asin(double);
float atanf(float);
double atan(double);
float atan2f(float, float);
double atan2(double, double);
float cosf(float);
double cos(double);
float sinf(float);
double sin(double);
float tanf(float);
double tan(double);
float sinhf(float);
double sinh(double);
float coshf(float);
double cosh(double);
float tanhf(float);
double tanh(double);
float fmaf(float, float, float);

// stddef.h
#if defined(_WIN32)
using size_t = unsigned __int64;
#else
using size_t = unsigned long;
#endif

// string.h
void* memset(void*, int, size_t);
void* memcpy(void*, const void*, size_t);

// stdlib.h
void* malloc(size_t);
void free(void*);

}  // extern "C"

// cmath
inline bool isfinite(double x)
{
    return _wp_isfinite(x);
}

// assert.h
#ifdef NDEBUG
    #define assert(expression) ((void)0)
#else
    #define assert(expression) (void)(                                    \
            (!!(expression)) ||                                           \
            (_wp_assert((#expression), (__FILE__), (unsigned)(__LINE__)), 0) \
        )
#endif

#endif  // !__CUDACC__

#endif // WP_NO_CRT