// This code contains NVIDIA Confidential Information and is disclosed to you // under a form of NVIDIA software license agreement provided separately to you. // // Notice // NVIDIA Corporation and its licensors retain all intellectual property and // proprietary rights in and to this software and related documentation and // any modifications thereto. Any use, reproduction, disclosure, or // distribution of this software and related documentation without an express // license agreement from NVIDIA Corporation is strictly prohibited. // // ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES // NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO // THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, // MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. // // Information and code furnished is believed to be accurate and reliable. // However, NVIDIA Corporation assumes no responsibility for the consequences of use of such // information or for any infringement of patents or other rights of third parties that may // result from its use. No license is granted by implication or otherwise under any patent // or patent rights of NVIDIA Corporation. Details are subject to change without notice. // This code supersedes and replaces all information previously supplied. // NVIDIA Corporation products are not authorized for use as critical // components in life support devices or systems without express written approval of // NVIDIA Corporation. // // Copyright (c) 20132017 NVIDIA Corporation. All rights reserved. #include #include #include #include #include #include "../../core/core.h" #include "../../core/maths.h" #include "../../include/NvFlex.h" #include "../../include/NvFlexExt.h" #define CudaCheck(x) { cudaError_t err = x; if (err != cudaSuccess) { printf("Cuda error: %d in %s at %s:%d\n", err, #x, __FILE__, __LINE__); assert(0); } } static const int kNumThreadsPerBlock = 256; struct NvFlexExtForceFieldCallback { NvFlexExtForceFieldCallback(NvFlexSolver* solver) : mSolver(solver) { // force fields mForceFieldsCpu = NULL; mForceFieldsGpu = NULL; mMaxForceFields = 0; mNumForceFields = 0; } ~NvFlexExtForceFieldCallback() { // force fields CudaCheck(cudaFreeHost(mForceFieldsCpu)); CudaCheck(cudaFree(mForceFieldsGpu)); } NvFlexExtForceField* mForceFieldsCpu; // pinned host copy for async transfer NvFlexExtForceField* mForceFieldsGpu; // device copy int mMaxForceFields; int mNumForceFields; NvFlexSolver* mSolver; }; NvFlexExtForceFieldCallback* NvFlexExtCreateForceFieldCallback(NvFlexSolver* solver) { return new NvFlexExtForceFieldCallback(solver); } void NvFlexExtDestroyForceFieldCallback(NvFlexExtForceFieldCallback* callback) { delete callback; } __global__ void UpdateForceFields(int numParticles, const Vec4* __restrict__ positions, Vec4* __restrict__ velocities, const NvFlexExtForceField* __restrict__ forceFields, int numForceFields, float dt) { const int i = blockIdx.x*blockDim.x + threadIdx.x; for (int f = 0; f < numForceFields; f++) { const NvFlexExtForceField& forceField = forceFields[f]; if (i < numParticles) { const int index = i; Vec4 p = positions[index]; Vec3 v = Vec3(velocities[index]); Vec3 localPos = Vec3(p.x, p.y, p.z) - Vec3(forceField.mPosition[0], forceField.mPosition[1], forceField.mPosition[2]); float length = Length(localPos); if (length >= forceField.mRadius) { continue; } Vec3 fieldDir; if (length > 0.0f) { fieldDir = localPos / length; } else { fieldDir = localPos; } // If using linear falloff, scale with distance. float fieldStrength = forceField.mStrength; if (forceField.mLinearFalloff) { fieldStrength *= (1.0f - (length / forceField.mRadius)); } // Apply force Vec3 force = localPos * fieldStrength; float unitMultiplier; if (forceField.mMode == eNvFlexExtModeForce) { unitMultiplier = dt * p.w; // time/mass } else if (forceField.mMode == eNvFlexExtModeImpulse) { unitMultiplier = p.w; // 1/mass } else if (forceField.mMode == eNvFlexExtModeVelocityChange) { unitMultiplier = 1.0f; } Vec3 deltaVelocity = fieldDir * fieldStrength * unitMultiplier; velocities[index] = Vec4(v + deltaVelocity, 0.0f); } } } void ApplyForceFieldsCallback(NvFlexSolverCallbackParams params) { // callbacks always have the correct CUDA device set so we can safely launch kernels without acquiring NvFlexExtForceFieldCallback* c = (NvFlexExtForceFieldCallback*)params.userData; if (params.numActive && c->mNumForceFields) { const int kNumBlocks = (params.numActive+kNumThreadsPerBlock-1)/kNumThreadsPerBlock; UpdateForceFields<<>>( params.numActive, (Vec4*)params.particles, (Vec4*)params.velocities, c->mForceFieldsGpu, c->mNumForceFields, params.dt); } } void NvFlexExtSetForceFields(NvFlexExtForceFieldCallback* c, const NvFlexExtForceField* forceFields, int numForceFields) { // re-alloc if necessary if (numForceFields > c->mMaxForceFields) { CudaCheck(cudaFreeHost(c->mForceFieldsCpu)); CudaCheck(cudaMallocHost(&c->mForceFieldsCpu, sizeof(NvFlexExtForceField)*numForceFields)); CudaCheck(cudaFree(c->mForceFieldsGpu)); CudaCheck(cudaMalloc(&c->mForceFieldsGpu, sizeof(NvFlexExtForceField)*numForceFields)); c->mMaxForceFields = numForceFields; } c->mNumForceFields = numForceFields; if (numForceFields > 0) { // copy to pinned host memory memcpy(c->mForceFieldsCpu, forceFields, numForceFields*sizeof(NvFlexExtForceField)); cudaMemcpyKind kind = cudaMemcpyHostToDevice; CudaCheck(cudaMemcpyAsync(c->mForceFieldsGpu, &c->mForceFieldsCpu[0], numForceFields*sizeof(NvFlexExtForceField), kind, 0)); } NvFlexSolverCallback callback; callback.function = ApplyForceFieldsCallback; callback.userData = c; // register a callback to calculate the forces at the end of the time-step NvFlexRegisterSolverCallback(c->mSolver, callback, eNvFlexStageUpdateEnd); }