qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
# Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import unittest
import numpy as np
import warp as wp
from warp.tests.unittest_utils import *
wp.init()
np_signed_int_types = [
np.int8,
np.int16,
np.int32,
np.int64,
np.byte,
]
np_unsigned_int_types = [
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.ubyte,
]
np_int_types = np_signed_int_types + np_unsigned_int_types
np_float_types = [np.float16, np.float32, np.float64]
np_scalar_types = np_int_types + np_float_types
def randvals(rng, shape, dtype):
if dtype in np_float_types:
return rng.standard_normal(size=shape).astype(dtype)
elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
return rng.integers(1, high=3, size=shape, dtype=dtype)
return rng.integers(1, high=5, size=shape, dtype=dtype)
kernel_cache = dict()
def getkernel(func, suffix=""):
key = func.__name__ + "_" + suffix
if key not in kernel_cache:
kernel_cache[key] = wp.Kernel(func=func, key=key)
return kernel_cache[key]
def get_select_kernel(dtype):
def output_select_kernel_fn(
input: wp.array(dtype=dtype),
index: int,
out: wp.array(dtype=dtype),
):
out[0] = input[index]
return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
def get_select_kernel2(dtype):
def output_select_kernel2_fn(
input: wp.array(dtype=dtype, ndim=2),
index0: int,
index1: int,
out: wp.array(dtype=dtype),
):
out[0] = input[index0, index1]
return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
def test_arrays(test, device, dtype):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
arr_np = randvals(rng, (10, 5), dtype)
arr = wp.array(arr_np, dtype=wptype, requires_grad=True, device=device)
assert_np_equal(arr.numpy(), arr_np, tol=tol)
def test_unary_ops(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_unary(
inputs: wp.array(dtype=wptype, ndim=2),
outputs: wp.array(dtype=wptype, ndim=2),
):
for i in range(10):
i0 = inputs[0, i]
i1 = inputs[1, i]
i2 = inputs[2, i]
i3 = inputs[3, i]
i4 = inputs[4, i]
# multiply outputs by 2 so we've got something to backpropagate:
outputs[0, i] = wptype(2.0) * (+i0)
outputs[1, i] = wptype(2.0) * (-i1)
outputs[2, i] = wptype(2.0) * wp.sign(i2)
outputs[3, i] = wptype(2.0) * wp.abs(i3)
outputs[4, i] = wptype(2.0) * wp.step(i4)
kernel = getkernel(check_unary, suffix=dtype.__name__)
output_select_kernel = get_select_kernel2(wptype)
if register_kernels:
return
if dtype in np_float_types:
inputs = wp.array(
rng.standard_normal(size=(5, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device
)
else:
inputs = wp.array(
rng.integers(-2, high=3, size=(5, 10), dtype=dtype), dtype=wptype, requires_grad=True, device=device
)
outputs = wp.zeros_like(inputs)
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
assert_np_equal(outputs.numpy()[0], 2 * inputs.numpy()[0], tol=tol)
assert_np_equal(outputs.numpy()[1], -2 * inputs.numpy()[1], tol=tol)
expected = 2 * np.sign(inputs.numpy()[2])
expected[expected == 0] = 2
assert_np_equal(outputs.numpy()[2], expected, tol=tol)
assert_np_equal(outputs.numpy()[3], 2 * np.abs(inputs.numpy()[3]), tol=tol)
assert_np_equal(outputs.numpy()[4], 2 * (1 - np.heaviside(inputs.numpy()[4], 1)), tol=tol)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(10):
# grad of 2x:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
tape.backward(loss=out)
expected_grads = np.zeros_like(inputs.numpy())
expected_grads[0, i] = 2
assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
tape.zero()
# grad of -2x:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
tape.backward(loss=out)
expected_grads = np.zeros_like(inputs.numpy())
expected_grads[1, i] = -2
assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
tape.zero()
# grad of 2 * sign(x):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
tape.backward(loss=out)
expected_grads = np.zeros_like(inputs.numpy())
assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
tape.zero()
# grad of 2 * abs(x):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
tape.backward(loss=out)
expected_grads = np.zeros_like(inputs.numpy())
expected_grads[3, i] = 2 * np.sign(inputs.numpy()[3, i])
assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
tape.zero()
# grad of 2 * step(x):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
tape.backward(loss=out)
expected_grads = np.zeros_like(inputs.numpy())
assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
tape.zero()
def test_nonzero(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_nonzero(
inputs: wp.array(dtype=wptype),
outputs: wp.array(dtype=wptype),
):
for i in range(10):
i0 = inputs[i]
outputs[i] = wp.nonzero(i0)
kernel = getkernel(check_nonzero, suffix=dtype.__name__)
output_select_kernel = get_select_kernel(wptype)
if register_kernels:
return
inputs = wp.array(rng.integers(-2, high=3, size=10).astype(dtype), dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(inputs)
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
assert_np_equal(outputs.numpy(), (inputs.numpy() != 0))
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(10):
# grad should just be zero:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
tape.backward(loss=out)
expected_grads = np.zeros_like(inputs.numpy())
assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
tape.zero()
def test_binary_ops(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-2,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_binary_ops(
in1: wp.array(dtype=wptype, ndim=2),
in2: wp.array(dtype=wptype, ndim=2),
outputs: wp.array(dtype=wptype, ndim=2),
):
for i in range(10):
i0 = in1[0, i]
i1 = in1[1, i]
i2 = in1[2, i]
i3 = in1[3, i]
i4 = in1[4, i]
i5 = in1[5, i]
i6 = in1[6, i]
i7 = in1[7, i]
j0 = in2[0, i]
j1 = in2[1, i]
j2 = in2[2, i]
j3 = in2[3, i]
j4 = in2[4, i]
j5 = in2[5, i]
j6 = in2[6, i]
j7 = in2[7, i]
outputs[0, i] = wptype(2) * wp.mul(i0, j0)
outputs[1, i] = wptype(2) * wp.div(i1, j1)
outputs[2, i] = wptype(2) * wp.add(i2, j2)
outputs[3, i] = wptype(2) * wp.sub(i3, j3)
outputs[4, i] = wptype(2) * wp.mod(i4, j4)
outputs[5, i] = wptype(2) * wp.min(i5, j5)
outputs[6, i] = wptype(2) * wp.max(i6, j6)
outputs[7, i] = wptype(2) * wp.floordiv(i7, j7)
kernel = getkernel(check_binary_ops, suffix=dtype.__name__)
output_select_kernel = get_select_kernel2(wptype)
if register_kernels:
return
vals1 = randvals(rng, [8, 10], dtype)
if dtype in [np_unsigned_int_types]:
vals2 = vals1 + randvals(rng, [8, 10], dtype)
else:
vals2 = np.abs(randvals(rng, [8, 10], dtype))
in1 = wp.array(vals1, dtype=wptype, requires_grad=True, device=device)
in2 = wp.array(vals2, dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(in1)
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
assert_np_equal(outputs.numpy()[0], 2 * in1.numpy()[0] * in2.numpy()[0], tol=tol)
if dtype in np_float_types:
assert_np_equal(outputs.numpy()[1], 2 * in1.numpy()[1] / (in2.numpy()[1]), tol=tol)
else:
assert_np_equal(outputs.numpy()[1], 2 * (in1.numpy()[1] // (in2.numpy()[1])), tol=tol)
assert_np_equal(outputs.numpy()[2], 2 * (in1.numpy()[2] + (in2.numpy()[2])), tol=tol)
assert_np_equal(outputs.numpy()[3], 2 * (in1.numpy()[3] - (in2.numpy()[3])), tol=tol)
# ...so this is actually the desired behaviour right? Looks like wp.mod doesn't behave like
# python's % operator or np.mod()...
assert_np_equal(
outputs.numpy()[4],
2
* (
(in1.numpy()[4])
- (in2.numpy()[4]) * np.sign(in1.numpy()[4]) * np.floor(np.abs(in1.numpy()[4]) / (in2.numpy()[4]))
),
tol=tol,
)
assert_np_equal(outputs.numpy()[5], 2 * np.minimum(in1.numpy()[5], in2.numpy()[5]), tol=tol)
assert_np_equal(outputs.numpy()[6], 2 * np.maximum(in1.numpy()[6], in2.numpy()[6]), tol=tol)
assert_np_equal(outputs.numpy()[7], 2 * np.floor_divide(in1.numpy()[7], in2.numpy()[7]), tol=tol)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(10):
# multiplication:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[0, i] = 2.0 * in2.numpy()[0, i]
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[0, i] = 2.0 * in1.numpy()[0, i]
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# division:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[1, i] = 2.0 / (in2.numpy()[1, i])
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
# y = x1/x2
# dy/dx2 = -x1/x2^2
expected[1, i] = (-2.0) * (in1.numpy()[1, i] / (in2.numpy()[1, i] ** 2))
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# addition:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[2, i] = 2.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[2, i] = 2.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# subtraction:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[3, i] = 2.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[3, i] = -2.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# modulus. unless at discontinuities,
# d/dx1( x1 % x2 ) == 1
# d/dx2( x1 % x2 ) == 0
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[4, i] = 2.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[4, i] = 0.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# min
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[5, i] = 2.0 if (in1.numpy()[5, i] < in2.numpy()[5, i]) else 0.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[5, i] = 2.0 if (in2.numpy()[5, i] < in1.numpy()[5, i]) else 0.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# max
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[6, i] = 2.0 if (in1.numpy()[6, i] > in2.numpy()[6, i]) else 0.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[6, i] = 2.0 if (in2.numpy()[6, i] > in1.numpy()[6, i]) else 0.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# floor_divide. Returns integers so gradient is zero
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
def test_special_funcs(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-2,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_special_funcs(
inputs: wp.array(dtype=wptype, ndim=2),
outputs: wp.array(dtype=wptype, ndim=2),
):
# multiply outputs by 2 so we've got something to backpropagate:
for i in range(10):
outputs[0, i] = wptype(2) * wp.log(inputs[0, i])
outputs[1, i] = wptype(2) * wp.log2(inputs[1, i])
outputs[2, i] = wptype(2) * wp.log10(inputs[2, i])
outputs[3, i] = wptype(2) * wp.exp(inputs[3, i])
outputs[4, i] = wptype(2) * wp.atan(inputs[4, i])
outputs[5, i] = wptype(2) * wp.sin(inputs[5, i])
outputs[6, i] = wptype(2) * wp.cos(inputs[6, i])
outputs[7, i] = wptype(2) * wp.sqrt(inputs[7, i])
outputs[8, i] = wptype(2) * wp.tan(inputs[8, i])
outputs[9, i] = wptype(2) * wp.sinh(inputs[9, i])
outputs[10, i] = wptype(2) * wp.cosh(inputs[10, i])
outputs[11, i] = wptype(2) * wp.tanh(inputs[11, i])
outputs[12, i] = wptype(2) * wp.acos(inputs[12, i])
outputs[13, i] = wptype(2) * wp.asin(inputs[13, i])
outputs[14, i] = wptype(2) * wp.cbrt(inputs[14, i])
kernel = getkernel(check_special_funcs, suffix=dtype.__name__)
output_select_kernel = get_select_kernel2(wptype)
if register_kernels:
return
invals = rng.normal(size=(15, 10)).astype(dtype)
invals[[0, 1, 2, 7, 14]] = 0.1 + np.abs(invals[[0, 1, 2, 7, 14]])
invals[12] = np.clip(invals[12], -0.9, 0.9)
invals[13] = np.clip(invals[13], -0.9, 0.9)
inputs = wp.array(invals, dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(inputs)
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
assert_np_equal(outputs.numpy()[0], 2 * np.log(inputs.numpy()[0]), tol=tol)
assert_np_equal(outputs.numpy()[1], 2 * np.log2(inputs.numpy()[1]), tol=tol)
assert_np_equal(outputs.numpy()[2], 2 * np.log10(inputs.numpy()[2]), tol=tol)
assert_np_equal(outputs.numpy()[3], 2 * np.exp(inputs.numpy()[3]), tol=tol)
assert_np_equal(outputs.numpy()[4], 2 * np.arctan(inputs.numpy()[4]), tol=tol)
assert_np_equal(outputs.numpy()[5], 2 * np.sin(inputs.numpy()[5]), tol=tol)
assert_np_equal(outputs.numpy()[6], 2 * np.cos(inputs.numpy()[6]), tol=tol)
assert_np_equal(outputs.numpy()[7], 2 * np.sqrt(inputs.numpy()[7]), tol=tol)
assert_np_equal(outputs.numpy()[8], 2 * np.tan(inputs.numpy()[8]), tol=tol)
assert_np_equal(outputs.numpy()[9], 2 * np.sinh(inputs.numpy()[9]), tol=tol)
assert_np_equal(outputs.numpy()[10], 2 * np.cosh(inputs.numpy()[10]), tol=tol)
assert_np_equal(outputs.numpy()[11], 2 * np.tanh(inputs.numpy()[11]), tol=tol)
assert_np_equal(outputs.numpy()[12], 2 * np.arccos(inputs.numpy()[12]), tol=tol)
assert_np_equal(outputs.numpy()[13], 2 * np.arcsin(inputs.numpy()[13]), tol=tol)
assert_np_equal(outputs.numpy()[14], 2 * np.cbrt(inputs.numpy()[14]), tol=tol)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(10):
# log:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[0, i] = 2.0 / inputs.numpy()[0, i]
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# log2:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[1, i] = 2.0 / (inputs.numpy()[1, i] * np.log(2.0))
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# log10:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[2, i] = 2.0 / (inputs.numpy()[2, i] * np.log(10.0))
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# exp:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[3, i] = outputs.numpy()[3, i]
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# arctan:
# looks like the autodiff formula in warp was wrong? Was (1 + x^2) rather than
# 1/(1 + x^2)
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[4, i] = 2.0 / (inputs.numpy()[4, i] ** 2 + 1)
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# sin:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[5, i] = np.cos(inputs.numpy()[5, i]) * 2
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# cos:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[6, i] = -np.sin(inputs.numpy()[6, i]) * 2.0
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# sqrt:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[7, i] = 1.0 / (np.sqrt(inputs.numpy()[7, i]))
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# tan:
# looks like there was a bug in autodiff formula here too - gradient was zero if cos(x) > 0
# (should have been "if(cosx != 0)")
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 8, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[8, i] = 2.0 / (np.cos(inputs.numpy()[8, i]) ** 2)
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=200 * tol)
tape.zero()
# sinh:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 9, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[9, i] = 2.0 * np.cosh(inputs.numpy()[9, i])
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# cosh:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 10, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[10, i] = 2.0 * np.sinh(inputs.numpy()[10, i])
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# tanh:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 11, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[11, i] = 2.0 / (np.cosh(inputs.numpy()[11, i]) ** 2)
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# arccos:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 12, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[12, i] = -2.0 / np.sqrt(1 - inputs.numpy()[12, i] ** 2)
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
# arcsin:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 13, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
expected[13, i] = 2.0 / np.sqrt(1 - inputs.numpy()[13, i] ** 2)
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=6 * tol)
tape.zero()
# cbrt:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 14, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(inputs.numpy())
cbrt = np.cbrt(inputs.numpy()[14, i], dtype=np.dtype(dtype))
expected[14, i] = (2.0 / 3.0) * (1.0 / (cbrt * cbrt))
assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
tape.zero()
def test_special_funcs_2arg(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-2,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_special_funcs_2arg(
in1: wp.array(dtype=wptype, ndim=2),
in2: wp.array(dtype=wptype, ndim=2),
outputs: wp.array(dtype=wptype, ndim=2),
):
# multiply outputs by 2 so we've got something to backpropagate:
for i in range(10):
outputs[0, i] = wptype(2) * wp.pow(in1[0, i], in2[0, i])
outputs[1, i] = wptype(2) * wp.atan2(in1[1, i], in2[1, i])
kernel = getkernel(check_special_funcs_2arg, suffix=dtype.__name__)
output_select_kernel = get_select_kernel2(wptype)
if register_kernels:
return
in1 = wp.array(np.abs(randvals(rng, [2, 10], dtype)), dtype=wptype, requires_grad=True, device=device)
in2 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(in1)
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
assert_np_equal(outputs.numpy()[0], 2.0 * np.power(in1.numpy()[0], in2.numpy()[0]), tol=tol)
assert_np_equal(outputs.numpy()[1], 2.0 * np.arctan2(in1.numpy()[1], in2.numpy()[1]), tol=tol)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(10):
# pow:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[0, i] = 2.0 * in2.numpy()[0, i] * np.power(in1.numpy()[0, i], in2.numpy()[0, i] - 1)
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=5 * tol)
expected[0, i] = 2.0 * np.power(in1.numpy()[0, i], in2.numpy()[0, i]) * np.log(in1.numpy()[0, i])
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
# atan2:
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
tape.backward(loss=out)
expected = np.zeros_like(in1.numpy())
expected[1, i] = 2.0 * in2.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[1, i] = -2.0 * in1.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
tape.zero()
def test_float_to_int(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_float_to_int(
inputs: wp.array(dtype=wptype, ndim=2),
outputs: wp.array(dtype=wptype, ndim=2),
):
for i in range(10):
outputs[0, i] = wp.round(inputs[0, i])
outputs[1, i] = wp.rint(inputs[1, i])
outputs[2, i] = wp.trunc(inputs[2, i])
outputs[3, i] = wp.floor(inputs[3, i])
outputs[4, i] = wp.ceil(inputs[4, i])
outputs[5, i] = wp.frac(inputs[5, i])
kernel = getkernel(check_float_to_int, suffix=dtype.__name__)
output_select_kernel = get_select_kernel2(wptype)
if register_kernels:
return
inputs = wp.array(rng.standard_normal(size=(6, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(inputs)
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
assert_np_equal(outputs.numpy()[0], np.round(inputs.numpy()[0]))
assert_np_equal(outputs.numpy()[1], np.rint(inputs.numpy()[1]))
assert_np_equal(outputs.numpy()[2], np.trunc(inputs.numpy()[2]))
assert_np_equal(outputs.numpy()[3], np.floor(inputs.numpy()[3]))
assert_np_equal(outputs.numpy()[4], np.ceil(inputs.numpy()[4]))
assert_np_equal(outputs.numpy()[5], np.modf(inputs.numpy()[5])[0])
# all the gradients should be zero as these functions are piecewise constant:
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
for i in range(10):
for j in range(5):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, j, i], outputs=[out], device=device)
tape.backward(loss=out)
assert_np_equal(tape.gradients[inputs].numpy(), np.zeros_like(inputs.numpy()), tol=tol)
tape.zero()
def test_interp(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-2,
np.float32: 5.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_interp(
in1: wp.array(dtype=wptype, ndim=2),
in2: wp.array(dtype=wptype, ndim=2),
in3: wp.array(dtype=wptype, ndim=2),
outputs: wp.array(dtype=wptype, ndim=2),
):
# multiply outputs by 2 so we've got something to backpropagate:
for i in range(10):
outputs[0, i] = wptype(2) * wp.smoothstep(in1[0, i], in2[0, i], in3[0, i])
outputs[1, i] = wptype(2) * wp.lerp(in1[1, i], in2[1, i], in3[1, i])
kernel = getkernel(check_interp, suffix=dtype.__name__)
output_select_kernel = get_select_kernel2(wptype)
if register_kernels:
return
e0 = randvals(rng, [2, 10], dtype)
e1 = e0 + randvals(rng, [2, 10], dtype) + 0.1
in1 = wp.array(e0, dtype=wptype, requires_grad=True, device=device)
in2 = wp.array(e1, dtype=wptype, requires_grad=True, device=device)
in3 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(in1)
wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
edge0 = in1.numpy()[0]
edge1 = in2.numpy()[0]
t_smoothstep = in3.numpy()[0]
x = np.clip((t_smoothstep - edge0) / (edge1 - edge0), 0, 1)
smoothstep_expected = 2.0 * x * x * (3 - 2 * x)
assert_np_equal(outputs.numpy()[0], smoothstep_expected, tol=tol)
a = in1.numpy()[1]
b = in2.numpy()[1]
t = in3.numpy()[1]
assert_np_equal(outputs.numpy()[1], 2.0 * (a * (1 - t) + b * t), tol=tol)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(10):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
tape.backward(loss=out)
# e0 = in1
# e1 = in2
# t = in3
# x = clamp((t - e0) / (e1 - e0), 0,1)
# dx/dt = 1 / (e1 - e0) if e0 < t < e1 else 0
# y = x * x * (3 - 2 * x)
# y = 3 * x * x - 2 * x * x * x
# dy/dx = 6 * ( x - x^2 )
dydx = 6 * x * (1 - x)
# dy/in1 = dy/dx dx/de0 de0/din1
dxde0 = (t_smoothstep - edge1) / ((edge1 - edge0) ** 2)
dxde0[x == 0] = 0
dxde0[x == 1] = 0
expected_grads = np.zeros_like(in1.numpy())
expected_grads[0, i] = 2.0 * dydx[i] * dxde0[i]
assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
# dy/in2 = dy/dx dx/de1 de1/din2
dxde1 = (edge0 - t_smoothstep) / ((edge1 - edge0) ** 2)
dxde1[x == 0] = 0
dxde1[x == 1] = 0
expected_grads = np.zeros_like(in1.numpy())
expected_grads[0, i] = 2.0 * dydx[i] * dxde1[i]
assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
# dy/in3 = dy/dx dx/dt dt/din3
dxdt = 1.0 / (edge1 - edge0)
dxdt[x == 0] = 0
dxdt[x == 1] = 0
expected_grads = np.zeros_like(in1.numpy())
expected_grads[0, i] = 2.0 * dydx[i] * dxdt[i]
assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
tape.zero()
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
tape.backward(loss=out)
# y = a*(1-t) + b*t
# a = in1
# b = in2
# t = in3
# y = in1*( 1 - in3 ) + in2*in3
# dy/din1 = (1-in3)
expected_grads = np.zeros_like(in1.numpy())
expected_grads[1, i] = 2.0 * (1 - in3.numpy()[1, i])
assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
# dy/din2 = in3
expected_grads = np.zeros_like(in1.numpy())
expected_grads[1, i] = 2.0 * in3.numpy()[1, i]
assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
# dy/din3 = 8*in2 - 1.5*4*in1
expected_grads = np.zeros_like(in1.numpy())
expected_grads[1, i] = 2.0 * (in2.numpy()[1, i] - in1.numpy()[1, i])
assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
tape.zero()
def test_clamp(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-6,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def check_clamp(
in1: wp.array(dtype=wptype),
in2: wp.array(dtype=wptype),
in3: wp.array(dtype=wptype),
outputs: wp.array(dtype=wptype),
):
for i in range(100):
# multiply output by 2 so we've got something to backpropagate:
outputs[i] = wptype(2) * wp.clamp(in1[i], in2[i], in3[i])
kernel = getkernel(check_clamp, suffix=dtype.__name__)
output_select_kernel = get_select_kernel(wptype)
if register_kernels:
return
in1 = wp.array(randvals(rng, [100], dtype), dtype=wptype, requires_grad=True, device=device)
starts = randvals(rng, [100], dtype)
diffs = np.abs(randvals(rng, [100], dtype))
in2 = wp.array(starts, dtype=wptype, requires_grad=True, device=device)
in3 = wp.array(starts + diffs, dtype=wptype, requires_grad=True, device=device)
outputs = wp.zeros_like(in1)
wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
assert_np_equal(2 * np.clip(in1.numpy(), in2.numpy(), in3.numpy()), outputs.numpy(), tol=tol)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
if dtype in np_float_types:
for i in range(100):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
tape.backward(loss=out)
t = in1.numpy()[i]
lower = in2.numpy()[i]
upper = in3.numpy()[i]
expected = np.zeros_like(in1.numpy())
if t < lower:
expected[i] = 2.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
expected[i] = 0.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
elif t > upper:
expected[i] = 2.0
assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
expected[i] = 0.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
else:
expected[i] = 2.0
assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
expected[i] = 0.0
assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
tape.zero()
devices = get_test_devices()
class TestArithmetic(unittest.TestCase):
pass
# these unary ops only make sense for signed values:
for dtype in np_signed_int_types + np_float_types:
add_function_test_register_kernel(
TestArithmetic, f"test_unary_ops_{dtype.__name__}", test_unary_ops, devices=devices, dtype=dtype
)
for dtype in np_float_types:
add_function_test_register_kernel(
TestArithmetic, f"test_special_funcs_{dtype.__name__}", test_special_funcs, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestArithmetic,
f"test_special_funcs_2arg_{dtype.__name__}",
test_special_funcs_2arg,
devices=devices,
dtype=dtype,
)
add_function_test_register_kernel(
TestArithmetic, f"test_interp_{dtype.__name__}", test_interp, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestArithmetic, f"test_float_to_int_{dtype.__name__}", test_float_to_int, devices=devices, dtype=dtype
)
for dtype in np_scalar_types:
add_function_test_register_kernel(
TestArithmetic, f"test_clamp_{dtype.__name__}", test_clamp, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestArithmetic, f"test_nonzero_{dtype.__name__}", test_nonzero, devices=devices, dtype=dtype
)
add_function_test(TestArithmetic, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
add_function_test_register_kernel(
TestArithmetic, f"test_binary_ops_{dtype.__name__}", test_binary_ops, devices=devices, dtype=dtype
)
if __name__ == "__main__":
wp.build.clear_kernel_cache()
unittest.main(verbosity=2, failfast=False)