qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
# Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import unittest
import numpy as np
import warp as wp
from warp.tests.unittest_utils import *
wp.init()
np_signed_int_types = [
np.int8,
np.int16,
np.int32,
np.int64,
np.byte,
]
np_float_types = [np.float16, np.float32, np.float64]
def randvals(rng, shape, dtype):
if dtype in np_float_types:
return rng.standard_normal(size=shape).astype(dtype)
elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
return rng.integers(1, high=3, size=shape, dtype=dtype)
return rng.integers(1, high=5, size=shape, dtype=dtype)
kernel_cache = dict()
def getkernel(func, suffix=""):
key = func.__name__ + "_" + suffix
if key not in kernel_cache:
kernel_cache[key] = wp.Kernel(func=func, key=key)
return kernel_cache[key]
def get_select_kernel(dtype):
def output_select_kernel_fn(
input: wp.array(dtype=dtype),
index: int,
out: wp.array(dtype=dtype),
):
out[0] = input[index]
return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
wp.launch(kernel, dim=1, inputs=[])
def test_anon_constructor_error_shape_keyword_missing(test, device):
@wp.kernel
def kernel():
wp.matrix(1.0, 2.0, 3.0)
with test.assertRaisesRegex(
RuntimeError,
r"shape keyword must be specified when calling matrix\(\) function$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_anon_constructor_error_dtype_keyword_missing(test, device):
@wp.kernel
def kernel():
wp.matrix(shape=(3, 3))
with test.assertRaisesRegex(
RuntimeError,
r"matrix\(\) must have dtype as a keyword argument if it has no " r"positional arguments$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_anon_constructor_error_shape_mismatch(test, device):
@wp.kernel
def kernel():
wp.matrix(
wp.matrix(shape=(1, 2), dtype=float),
shape=(3, 4),
dtype=float,
)
with test.assertRaisesRegex(
RuntimeError,
r"Incompatible matrix sizes for casting copy constructor, " r"\(3, 4\) vs \(1, 2\)$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_anon_constructor_error_invalid_arg_count(test, device):
@wp.kernel
def kernel():
wp.matrix(1.0, 2.0, 3.0, shape=(2, 2), dtype=float)
with test.assertRaisesRegex(
RuntimeError,
r"Wrong number of arguments for matrix\(\) function, must initialize "
r"with either a scalar value, or m\*n values$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_tpl_constructor_error_incompatible_sizes(test, device):
@wp.kernel
def kernel():
wp.mat33(wp.mat22(1.0, 2.0, 3.0, 4.0))
with test.assertRaisesRegex(
RuntimeError,
r"Incompatible matrix sizes for casting copy constructor, " r"\(3, 3\) vs \(2, 2\)$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_tpl_constructor_error_invalid_scalar_type(test, device):
@wp.kernel
def kernel():
wp.mat22(1, 2, 3, 4)
with test.assertRaisesRegex(
RuntimeError,
r"Wrong scalar type for mat 2,2,<class 'warp.types.float32'> constructor$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_tpl_constructor_error_invalid_vector_count(test, device):
@wp.kernel
def kernel():
wp.mat22(wp.vec3(1.0, 2.0, 3.0))
with test.assertRaisesRegex(
RuntimeError,
r"Wrong number of vectors when attempting to construct a matrix " r"with column vectors$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_tpl_constructor_error_invalid_vector_shape(test, device):
@wp.kernel
def kernel():
wp.mat22(wp.vec3(1.0, 2.0, 3.0), wp.vec3(4.0, 5.0, 6.0))
with test.assertRaisesRegex(
RuntimeError,
r"Wrong vector row count when attempting to construct a matrix " r"with column vectors$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_tpl_constructor_error_invalid_arg_count(test, device):
@wp.kernel
def kernel():
wp.mat22(1.0, 2.0, 3.0)
with test.assertRaisesRegex(
RuntimeError,
r"Wrong number of scalars when attempting to construct a matrix " r"from a list of components$",
):
wp.launch(
kernel,
dim=1,
inputs=[],
device=device,
)
def test_tpl_ops_with_anon(test, device):
mat22f = wp.mat((2, 2), dtype=float)
m = wp.mat22f(1.0, 2.0, 3.0, 4.0)
m += mat22f(2.0, 3.0, 4.0, 5.0)
m -= mat22f(3.0, 4.0, 5.0, 6.0)
test.assertSequenceEqual(m, ((0.0, 1.0), (2.0, 3.0)))
m = mat22f(1.0, 2.0, 3.0, 4.0)
m += wp.mat22f(2.0, 3.0, 4.0, 5.0)
m -= wp.mat22f(3.0, 4.0, 5.0, 6.0)
test.assertSequenceEqual(m, ((0.0, 1.0), (2.0, 3.0)))
def test_py_arithmetic_ops(test, device, dtype):
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
def make_mat(*args):
if wptype in wp.types.int_types:
# Cast to the correct integer type to simulate wrapping.
return tuple(tuple(wptype._type_(x).value for x in row) for row in args)
return args
def make_vec(*args):
if wptype in wp.types.int_types:
# Cast to the correct integer type to simulate wrapping.
return tuple(wptype._type_(x).value for x in args)
return args
mat_cls = wp.mat((3, 3), wptype)
vec_cls = wp.vec(3, wptype)
m = mat_cls(((-1, 2, 3), (4, -5, 6), (7, 8, -9)))
test.assertSequenceEqual(+m, make_mat((-1, 2, 3), (4, -5, 6), (7, 8, -9)))
test.assertSequenceEqual(-m, make_mat((1, -2, -3), (-4, 5, -6), (-7, -8, 9)))
test.assertSequenceEqual(m + mat_cls((5, 5, 5) * 3), make_mat((4, 7, 8), (9, 0, 11), (12, 13, -4)))
test.assertSequenceEqual(m - mat_cls((5, 5, 5) * 3), make_mat((-6, -3, -2), (-1, -10, 1), (2, 3, -14)))
test.assertSequenceEqual(m * vec_cls(5, 5, 5), make_vec(20, 25, 30))
test.assertSequenceEqual(m @ vec_cls(5, 5, 5), make_vec(20, 25, 30))
test.assertSequenceEqual(vec_cls(5, 5, 5) * m, make_vec(50, 25, 0))
test.assertSequenceEqual(vec_cls(5, 5, 5) @ m, make_vec(50, 25, 0))
m = mat_cls(((2, 4, 6), (8, 10, 12), (14, 16, 18)))
test.assertSequenceEqual(m * wptype(2), make_mat((4, 8, 12), (16, 20, 24), (28, 32, 36)))
test.assertSequenceEqual(wptype(2) * m, make_mat((4, 8, 12), (16, 20, 24), (28, 32, 36)))
test.assertSequenceEqual(m / wptype(2), make_mat((1, 2, 3), (4, 5, 6), (7, 8, 9)))
test.assertSequenceEqual(wptype(5040) / m, make_mat((2520, 1260, 840), (630, 504, 420), (360, 315, 280)))
test.assertSequenceEqual(m * vec_cls(5, 5, 5), make_vec(60, 150, 240))
test.assertSequenceEqual(m @ vec_cls(5, 5, 5), make_vec(60, 150, 240))
test.assertSequenceEqual(vec_cls(5, 5, 5) * m, make_vec(120, 150, 180))
test.assertSequenceEqual(vec_cls(5, 5, 5) @ m, make_vec(120, 150, 180))
def test_quat_constructor(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
vec4 = wp.types.vector(length=4, dtype=wptype)
vec3 = wp.types.vector(length=3, dtype=wptype)
quat = wp.types.quaternion(dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_quat_constructor(
p: wp.array(dtype=vec3),
r: wp.array(dtype=quat),
s: wp.array(dtype=vec3),
outcomponents: wp.array(dtype=wptype),
outcomponents_alt: wp.array(dtype=wptype),
):
m = mat44(p[0], r[0], s[0])
R = wp.transpose(wp.quat_to_matrix(r[0]))
c0 = s[0][0] * R[0]
c1 = s[0][1] * R[1]
c2 = s[0][2] * R[2]
m_alt = mat44(
vec4(c0[0], c0[1], c0[2], wptype(0.0)),
vec4(c1[0], c1[1], c1[2], wptype(0.0)),
vec4(c2[0], c2[1], c2[2], wptype(0.0)),
vec4(p[0][0], p[0][1], p[0][2], wptype(1.0)),
)
idx = 0
for i in range(4):
for j in range(4):
outcomponents[idx] = m[i, j]
outcomponents_alt[idx] = m_alt[i, j]
idx = idx + 1
kernel = getkernel(check_mat_quat_constructor, suffix=dtype.__name__)
if register_kernels:
return
# translation:
p = wp.array(rng.standard_normal(size=(1, 3)).astype(dtype), dtype=vec3, requires_grad=True, device=device)
# generate a normalized quaternion for the rotation:
r = rng.standard_normal(size=(1, 4))
r /= np.linalg.norm(r)
r = wp.array(r.astype(dtype), dtype=quat, requires_grad=True, device=device)
# scale:
s = wp.array(rng.standard_normal(size=(1, 3)).astype(dtype), dtype=vec3, requires_grad=True, device=device)
# just going to generate the matrix using the constructor, then
# more manually, and make sure the values/gradients are the same:
outcomponents = wp.zeros(4 * 4, dtype=wptype, requires_grad=True, device=device)
outcomponents_alt = wp.zeros(4 * 4, dtype=wptype, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[p, r, s], outputs=[outcomponents, outcomponents_alt], device=device)
assert_np_equal(outcomponents.numpy(), outcomponents_alt.numpy(), tol=1.0e-6)
idx = 0
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
out_alt = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
for i in range(4):
for j in range(4):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[p, r, s], outputs=[outcomponents, outcomponents_alt], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
wp.launch(
output_select_kernel, dim=1, inputs=[outcomponents_alt, idx], outputs=[out_alt], device=device
)
tape.backward(loss=out)
p_grad = 1.0 * tape.gradients[p].numpy()[0]
r_grad = 1.0 * tape.gradients[r].numpy()[0]
s_grad = 1.0 * tape.gradients[s].numpy()[0]
tape.zero()
tape.backward(loss=out_alt)
p_grad_alt = 1.0 * tape.gradients[p].numpy()[0]
r_grad_alt = 1.0 * tape.gradients[r].numpy()[0]
s_grad_alt = 1.0 * tape.gradients[s].numpy()[0]
tape.zero()
assert_np_equal(p_grad, p_grad_alt, tol=tol)
assert_np_equal(r_grad, r_grad_alt, tol=tol)
assert_np_equal(s_grad, s_grad_alt, tol=tol)
idx = idx + 1
def test_negation(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-2,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
mat55 = wp.types.matrix(shape=(5, 5), dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_negation(
m2: wp.array(dtype=mat22),
m3: wp.array(dtype=mat33),
m4: wp.array(dtype=mat44),
m5: wp.array(dtype=mat55),
outcomponents: wp.array(dtype=wptype),
):
mat2 = -m2[0]
mat3 = -m3[0]
mat4 = -m4[0]
mat5 = -m5[0]
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(2):
for j in range(2):
outcomponents[idx] = wptype(2) * mat2[i, j]
idx = idx + 1
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * mat3[i, j]
idx = idx + 1
for i in range(4):
for j in range(4):
outcomponents[idx] = wptype(2) * mat4[i, j]
idx = idx + 1
for i in range(5):
for j in range(5):
outcomponents[idx] = wptype(2) * mat5[i, j]
idx = idx + 1
kernel = getkernel(check_mat_negation, suffix=dtype.__name__)
if register_kernels:
return
m2 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=True, device=device)
m3 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=True, device=device)
m4 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=True, device=device)
m5 = wp.array(randvals(rng, [1, 5, 5], dtype), dtype=mat55, requires_grad=True, device=device)
outcomponents = wp.zeros(2 * 2 + 3 * 3 + 4 * 4 + 5 * 5, dtype=wptype, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[m2, m3, m4, m5], outputs=[outcomponents], device=device)
assert_np_equal(outcomponents.numpy()[:4], -2 * m2.numpy().reshape(-1), tol=tol)
assert_np_equal(outcomponents.numpy()[4:13], -2 * m3.numpy().reshape(-1), tol=tol)
assert_np_equal(outcomponents.numpy()[13:29], -2 * m4.numpy().reshape(-1), tol=tol)
assert_np_equal(outcomponents.numpy()[29:54], -2 * m5.numpy().reshape(-1), tol=tol)
if dtype in np_float_types:
idx = 0
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
for dim, input in [(2, m2), (3, m3), (4, m4), (5, m5)]:
for i in range(dim):
for j in range(dim):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m2, m3, m4, m5], outputs=[outcomponents], device=device)
wp.launch(
output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device
)
tape.backward(loss=out)
expectedresult = np.zeros((dim, dim), dtype=dtype)
expectedresult[i, j] = -2
assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
tape.zero()
idx = idx + 1
def test_subtraction(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
mat55 = wp.types.matrix(shape=(5, 5), dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_sub(
s2: wp.array(dtype=mat22),
s3: wp.array(dtype=mat33),
s4: wp.array(dtype=mat44),
s5: wp.array(dtype=mat55),
v2: wp.array(dtype=mat22),
v3: wp.array(dtype=mat33),
v4: wp.array(dtype=mat44),
v5: wp.array(dtype=mat55),
outcomponents: wp.array(dtype=wptype),
):
v2result = v2[0] - s2[0]
v3result = v3[0] - s3[0]
v4result = v4[0] - s4[0]
v5result = v5[0] - s5[0]
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(2):
for j in range(2):
outcomponents[idx] = wptype(2) * v2result[i, j]
idx = idx + 1
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * v3result[i, j]
idx = idx + 1
for i in range(4):
for j in range(4):
outcomponents[idx] = wptype(2) * v4result[i, j]
idx = idx + 1
for i in range(5):
for j in range(5):
outcomponents[idx] = wptype(2) * v5result[i, j]
idx = idx + 1
kernel = getkernel(check_mat_sub, suffix=dtype.__name__)
if register_kernels:
return
s2 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=True, device=device)
s3 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=True, device=device)
s4 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=True, device=device)
s5 = wp.array(randvals(rng, [1, 5, 5], dtype), dtype=mat55, requires_grad=True, device=device)
v2 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=True, device=device)
v3 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=True, device=device)
v4 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=True, device=device)
v5 = wp.array(randvals(rng, [1, 5, 5], dtype), dtype=mat55, requires_grad=True, device=device)
outcomponents = wp.zeros(2 * 2 + 3 * 3 + 4 * 4 + 5 * 5, dtype=wptype, requires_grad=True, device=device)
wp.launch(
kernel,
dim=1,
inputs=[
s2,
s3,
s4,
s5,
v2,
v3,
v4,
v5,
],
outputs=[outcomponents],
device=device,
)
assert_np_equal(outcomponents.numpy()[:4], 2 * (v2.numpy() - s2.numpy()).reshape(-1), tol=tol)
assert_np_equal(outcomponents.numpy()[4:13], 2 * (v3.numpy() - s3.numpy()).reshape(-1), tol=tol)
assert_np_equal(outcomponents.numpy()[13:29], 2 * (v4.numpy() - s4.numpy()).reshape(-1), tol=tol)
assert_np_equal(outcomponents.numpy()[29:54], 2 * (v5.numpy() - s5.numpy()).reshape(-1), tol=10 * tol)
if dtype in np_float_types:
idx = 0
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
for dim, in1, in2 in [(2, s2, v2), (3, s3, v3), (4, s4, v4), (5, s5, v5)]:
for i in range(dim):
for j in range(dim):
tape = wp.Tape()
with tape:
wp.launch(
kernel,
dim=1,
inputs=[
s2,
s3,
s4,
s5,
v2,
v3,
v4,
v5,
],
outputs=[outcomponents],
device=device,
)
wp.launch(
output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device
)
tape.backward(loss=out)
expectedresult = np.zeros((dim, dim), dtype=dtype)
expectedresult[i, j] = 2
assert_np_equal(tape.gradients[in2].numpy()[0], expectedresult, tol=10 * tol)
expectedresult[i, j] = -2
assert_np_equal(tape.gradients[in1].numpy()[0], expectedresult, tol=10 * tol)
tape.zero()
idx = idx + 1
def test_determinant(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
def check_mat_det(
v2: wp.array(dtype=mat22),
v3: wp.array(dtype=mat33),
v4: wp.array(dtype=mat44),
det2: wp.array(dtype=wptype),
det3: wp.array(dtype=wptype),
det4: wp.array(dtype=wptype),
):
# multiply outputs by 2 so we've got something to backpropagate:
det2[0] = wptype(2) * wp.determinant(v2[0])
det3[0] = wptype(2) * wp.determinant(v3[0])
det4[0] = wptype(2) * wp.determinant(v4[0])
kernel = getkernel(check_mat_det, suffix=dtype.__name__)
if register_kernels:
return
v2 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=True, device=device)
v3 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=True, device=device)
v4 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=True, device=device)
det2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
det3 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
det4 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
tape = wp.Tape()
with tape:
wp.launch(
kernel,
dim=1,
inputs=[
v2,
v3,
v4,
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
if dtype in np_float_types:
assert_np_equal(det2.numpy()[0], 2 * np.linalg.det(v2.numpy()[0].astype(np.float64)), tol=100 * tol)
assert_np_equal(det3.numpy()[0], 2 * np.linalg.det(v3.numpy()[0].astype(np.float64)), tol=100 * tol)
assert_np_equal(det4.numpy()[0], 2 * np.linalg.det(v4.numpy()[0].astype(np.float64)), tol=420 * tol)
else:
assert_np_equal(det2.numpy()[0], 2 * np.around(np.linalg.det(v2.numpy()[0])).astype(int))
assert_np_equal(det3.numpy()[0], 2 * np.around(np.linalg.det(v3.numpy()[0])).astype(int))
assert_np_equal(det4.numpy()[0], 2 * np.around(np.linalg.det(v4.numpy()[0])).astype(int))
if dtype in np_float_types:
# determinant derivative formula is annoying so finite differences?
tape.backward(loss=det2)
v2grads = 1.0 * tape.gradients[v2].numpy()[0]
tape.zero()
tape.backward(loss=det3)
v3grads = 1.0 * tape.gradients[v3].numpy()[0]
tape.zero()
tape.backward(loss=det4)
v4grads = 1.0 * tape.gradients[v4].numpy()[0]
tape.zero()
# finite differences are also annoying hence the large tolerance...
# absolute nightmare in float16 too innit...
dx = 0.01 if dtype == np.float16 else 0.0001
fdtol = 2.0e-1 if dtype == np.float16 else 2.0e-3
for i in range(2):
for j in range(2):
v2test = v2.numpy()
v2test[0, i, j] += dx
wp.launch(
kernel,
dim=1,
inputs=[
wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device),
v3,
v4,
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
dplus = det2.numpy()[0]
v2test[0, i, j] -= 2.0 * dx
wp.launch(
kernel,
dim=1,
inputs=[
wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device),
v3,
v4,
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
dminus = det2.numpy()[0]
assert_np_equal((dplus - dminus) / (2.0 * dx * dplus), v2grads[i, j] / dplus, tol=fdtol)
for i in range(3):
for j in range(3):
v3test = v3.numpy()
v3test[0, i, j] += dx
wp.launch(
kernel,
dim=1,
inputs=[
v2,
wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
v4,
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
dplus = det3.numpy()[0]
v3test[0, i, j] -= 2.0 * dx
wp.launch(
kernel,
dim=1,
inputs=[
v2,
wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
v4,
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
dminus = det3.numpy()[0]
assert_np_equal((dplus - dminus) / (2.0 * dx * dplus), v3grads[i, j] / dplus, tol=fdtol)
for i in range(4):
for j in range(4):
v4test = v4.numpy()
v4test[0, i, j] += dx
wp.launch(
kernel,
dim=1,
inputs=[
v2,
v3,
wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
dplus = det4.numpy()[0]
v4test[0, i, j] -= 2.0 * dx
wp.launch(
kernel,
dim=1,
inputs=[
v2,
v3,
wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
],
outputs=[
det2,
det3,
det4,
],
device=device,
)
dminus = det4.numpy()[0]
assert_np_equal((dplus - dminus) / (2.0 * dx * dplus), v4grads[i, j] / dplus, tol=fdtol)
# Unused. Why?
# def test_get_diag(test, device, dtype, register_kernels=False):
# tol = {
# np.float16: 1.0e-3,
# np.float32: 1.0e-6,
# np.float64: 1.0e-8,
# }.get(dtype, 0)
#
# wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
# mat55 = wp.types.vector(shape=(5, 5), dtype=wptype)
#
# output_select_kernel = get_select_kernel(wptype)
#
# def check_mat_diag(
# m55: wp.array(dtype=mat55),
# outcomponents: wp.array(dtype=wptype),
# ):
# # multiply outputs by 2 so we've got something to backpropagate:
# vec5result = wptype(2) * wp.get_diag(m55[0])
#
# idx = 0
# for i in range(5):
# outcomponents[idx] = vec5result[i]
# idx = idx + 1
#
# kernel = getkernel(check_mat_diag, suffix=dtype.__name__)
#
# if register_kernels:
# return
#
# m55 = wp.array(randvals((1, 5, 5), dtype), dtype=mat55, requires_grad=True, device=device)
# outcomponents = wp.zeros(5, dtype=wptype, requires_grad=True, device=device)
# out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
#
# wp.launch(kernel, dim=1, inputs=[m55], outputs=[outcomponents], device=device)
#
# assert_np_equal(outcomponents.numpy(), 2 * np.diag(m55.numpy()[0]), tol=tol)
#
# if dtype in np_float_types:
# idx = 0
# for i in range(5):
# tape = wp.Tape()
# with tape:
# wp.launch(kernel, dim=1, inputs=[m55], outputs=[outcomponents], device=device)
# wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
# tape.backward(loss=out)
# expectedresult = np.zeros((5, 5), dtype=dtype)
# expectedresult[i, i] = 2
# assert_np_equal(tape.gradients[m55].numpy()[0], expectedresult, tol=10 * tol)
# tape.zero()
#
# idx = idx + 1
def test_inverse(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-2,
np.float32: 1.0e-5,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_inverse(
m2: wp.array(dtype=mat22),
m3: wp.array(dtype=mat33),
m4: wp.array(dtype=mat44),
outcomponents: wp.array(dtype=wptype),
):
m2result = wp.inverse(m2[0])
m3result = wp.inverse(m3[0])
m4result = wp.inverse(m4[0])
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(2):
for j in range(2):
outcomponents[idx] = wptype(2) * m2result[i, j]
idx = idx + 1
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * m3result[i, j]
idx = idx + 1
for i in range(4):
for j in range(4):
outcomponents[idx] = wptype(2) * m4result[i, j]
idx = idx + 1
kernel = getkernel(check_mat_inverse, suffix=dtype.__name__)
if register_kernels:
return
m2 = wp.array(
2 * (randvals(rng, [1, 2, 2], dtype) + 0.2 * np.eye(2)), dtype=mat22, requires_grad=True, device=device
)
m3 = wp.array(
2 * (randvals(rng, [1, 3, 3], dtype) + 0.2 * np.eye(3)), dtype=mat33, requires_grad=True, device=device
)
m4 = wp.array(
2 * (randvals(rng, [1, 4, 4], dtype) + 0.2 * np.eye(4)), dtype=mat44, requires_grad=True, device=device
)
outcomponents = wp.zeros(2 * 2 + 3 * 3 + 4 * 4, dtype=wptype, requires_grad=True, device=device)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[m2, m3, m4], outputs=[outcomponents], device=device)
assert_np_equal(outcomponents.numpy()[:4], 2 * np.linalg.inv(m2.numpy()[0].astype(np.float64)), tol=tol)
assert_np_equal(outcomponents.numpy()[4:13], 2 * np.linalg.inv(m3.numpy()[0].astype(np.float64)), tol=5 * tol)
assert_np_equal(outcomponents.numpy()[13:], 2 * np.linalg.inv(m4.numpy()[0].astype(np.float64)), tol=5 * tol)
if dtype in np_float_types:
# check gradients:
idx = 0
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
for dim, input in [(2, m2), (3, m3), (4, m4)]:
minv = np.linalg.inv(input.numpy()[0].astype(np.float64))
for i in range(dim):
for j in range(dim):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m2, m3, m4], outputs=[outcomponents], device=device)
wp.launch(
output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device
)
tape.backward(loss=out)
d = np.zeros((dim, dim))
d[j, i] = 2
assert_np_equal(
tape.gradients[input].numpy()[0], -np.matmul(minv, np.matmul(d, minv)).T, tol=10 * tol
)
tape.zero()
idx = idx + 1
# let's check 2x2 using different formulae just for (in)sanity's sake:
m = m2.numpy()[0]
det = m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]
expected = 2 * np.array([[m[1, 1], -m[0, 1]], [-m[1, 0], m[0, 0]]], dtype=dtype) / det
assert_np_equal(expected, outcomponents.numpy()[:4], tol=tol)
# 0,0 component is this:
# 2 * m[1,1] / (m[0,0]*m[1,1] - m[1,0] * m[0,1])
assert_np_equal(2 * m[1, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]), outcomponents.numpy()[0], tol=tol)
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m2, m3, m4], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, 0], outputs=[out], device=device)
if dtype in np_float_types:
tape.backward(loss=out)
g = tape.gradients[m2].numpy()[0]
assert_np_equal(-2 * m[1, 1] * m[1, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 0], tol=tol)
assert_np_equal(2 * m[1, 1] * m[0, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 0], tol=tol)
assert_np_equal(-2 * m[0, 1] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 1], tol=tol)
assert_np_equal(2 * m[1, 1] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 1], tol=tol)
tape.zero()
# 0,1 component is this:
# -2 * m[0,1] / (m[0,0]*m[1,1] - m[1,0] * m[0,1])
assert_np_equal(-2 * m[0, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]), outcomponents.numpy()[1], tol=tol)
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m2, m3, m4], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, 1], outputs=[out], device=device)
if dtype in np_float_types:
tape.backward(loss=out)
g = tape.gradients[m2].numpy()[0]
assert_np_equal(2 * m[0, 1] * m[1, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 0], tol=tol)
assert_np_equal(-2 * m[0, 1] * m[0, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 0], tol=tol)
assert_np_equal(2 * m[0, 0] * m[0, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 1], tol=tol)
assert_np_equal(-2 * m[1, 1] * m[0, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 1], tol=tol)
tape.zero()
# 1,0 component is this:
# -2 * m[1,0] / (m[0,0]*m[1,1] - m[1,0] * m[0,1])
assert_np_equal(-2 * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]), outcomponents.numpy()[2], tol=tol)
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m2, m3, m4], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, 2], outputs=[out], device=device)
if dtype in np_float_types:
tape.backward(loss=out)
g = tape.gradients[m2].numpy()[0]
assert_np_equal(2 * m[1, 1] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 0], tol=tol)
assert_np_equal(-2 * m[0, 0] * m[1, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 0], tol=tol)
assert_np_equal(2 * m[0, 0] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 1], tol=tol)
assert_np_equal(-2 * m[1, 0] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 1], tol=tol)
tape.zero()
# 1,1 component is this:
# 2 * m[0,0] / (m[0,0]*m[1,1] - m[1,0] * m[0,1])
assert_np_equal(2 * m[0, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]), outcomponents.numpy()[3], tol=tol)
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m2, m3, m4], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, 3], outputs=[out], device=device)
if dtype in np_float_types:
tape.backward(loss=out)
g = tape.gradients[m2].numpy()[0]
assert_np_equal(-2 * m[0, 1] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 0], tol=tol)
assert_np_equal(2 * m[0, 0] * m[0, 1] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 0], tol=tol)
assert_np_equal(2 * m[0, 0] * m[1, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[0, 1], tol=tol)
assert_np_equal(-2 * m[0, 0] * m[0, 0] / (m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) ** 2, g[1, 1], tol=tol)
tape.zero()
def test_svd(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-6,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
vec3 = wp.types.vector(length=3, dtype=wptype)
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
def check_mat_svd(
m3: wp.array(dtype=mat33),
Uout: wp.array(dtype=mat33),
sigmaout: wp.array(dtype=vec3),
Vout: wp.array(dtype=mat33),
outcomponents: wp.array(dtype=wptype),
):
U = mat33()
sigma = vec3()
V = mat33()
wp.svd3(m3[0], U, sigma, V)
Uout[0] = U
sigmaout[0] = sigma
Vout[0] = V
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * U[i, j]
idx = idx + 1
for i in range(3):
outcomponents[idx] = wptype(2) * sigma[i]
idx = idx + 1
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * V[i, j]
idx = idx + 1
kernel = getkernel(check_mat_svd, suffix=dtype.__name__)
output_select_kernel = get_select_kernel(wptype)
if register_kernels:
return
m3 = wp.array(randvals(rng, [1, 3, 3], dtype) + np.eye(3), dtype=mat33, requires_grad=True, device=device)
outcomponents = wp.zeros(2 * 3 * 3 + 3, dtype=wptype, requires_grad=True, device=device)
Uout = wp.zeros(1, dtype=mat33, requires_grad=True, device=device)
sigmaout = wp.zeros(1, dtype=vec3, requires_grad=True, device=device)
Vout = wp.zeros(1, dtype=mat33, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[m3], outputs=[Uout, sigmaout, Vout, outcomponents], device=device)
Uout_np = Uout.numpy()[0].astype(np.float64)
sigmaout_np = np.diag(sigmaout.numpy()[0].astype(np.float64))
Vout_np = Vout.numpy()[0].astype(np.float64)
assert_np_equal(
np.matmul(Uout_np, np.matmul(sigmaout_np, Vout_np.T)), m3.numpy()[0].astype(np.float64), tol=30 * tol
)
if dtype == np.float16:
# I'm not even going to bother testing the gradients for float16
# because the rounding errors are terrible...
return
# check gradients:
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
idx = 0
for idx in range(3 * 3 + 3 + 3 * 3):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m3], outputs=[Uout, sigmaout, Vout, outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
tape.backward(out)
m3grads = 1.0 * tape.gradients[m3].numpy()[0]
tape.zero()
dx = 0.0001
fdtol = 5.0e-4 if dtype == np.float64 else 2.0e-2
for ii in range(3):
for jj in range(3):
m3test = 1.0 * m3.numpy()
m3test[0, ii, jj] += dx
wp.launch(
kernel,
dim=1,
inputs=[wp.array(m3test, dtype=mat33, device=device)],
outputs=[Uout, sigmaout, Vout, outcomponents],
device=device,
)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
plusval = out.numpy()[0]
m3test = 1.0 * m3.numpy()
m3test[0, ii, jj] -= dx
wp.launch(
kernel,
dim=1,
inputs=[wp.array(m3test, dtype=mat33, device=device)],
outputs=[Uout, sigmaout, Vout, outcomponents],
device=device,
)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
minusval = out.numpy()[0]
assert_np_equal((plusval - minusval) / (2 * dx), m3grads[ii, jj], tol=fdtol)
def test_qr(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 2.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-6,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
def check_mat_qr(
m3: wp.array(dtype=mat33),
Qout: wp.array(dtype=mat33),
Rout: wp.array(dtype=mat33),
outcomponents: wp.array(dtype=wptype),
):
Q = mat33()
R = mat33()
wp.qr3(m3[0], Q, R)
Qout[0] = Q
Rout[0] = R
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * Q[i, j]
idx = idx + 1
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * R[i, j]
idx = idx + 1
kernel = getkernel(check_mat_qr, suffix=dtype.__name__)
output_select_kernel = get_select_kernel(wptype)
if register_kernels:
return
m3 = wp.array(0.5 * (randvals(rng, [1, 3, 3], dtype) + np.eye(3)), dtype=mat33, requires_grad=True, device=device)
outcomponents = wp.zeros(2 * 3 * 3, dtype=wptype, requires_grad=True, device=device)
Qout = wp.zeros(1, dtype=mat33, requires_grad=True, device=device)
Rout = wp.zeros(1, dtype=mat33, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[m3], outputs=[Qout, Rout, outcomponents], device=device)
Qout_np = Qout.numpy()[0].astype(np.float64)
Rout_np = Rout.numpy()[0].astype(np.float64)
# check it's actually a q and an r:
assert_np_equal(np.matmul(Qout_np.T, Qout_np), np.eye(3, dtype=np.float64), tol=tol)
assert_np_equal(Rout_np[1, [0]], np.zeros(1, dtype=np.float64), tol=tol)
assert_np_equal(Rout_np[2, [0, 1]], np.zeros(2, dtype=np.float64), tol=tol)
# check it's a factorization:
assert_np_equal(np.matmul(Qout_np, Rout_np), m3.numpy()[0].astype(np.float64), tol=30 * tol)
if dtype == np.float16:
# I'm not even going to bother testing the gradients for float16
# because the rounding errors are terrible...
return
# check gradients:
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
idx = 0
for idx in range(len(outcomponents)):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m3], outputs=[Qout, Rout, outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
tape.backward(out)
m3grads = 1.0 * tape.gradients[m3].numpy()[0]
tape.zero()
dx = 0.0001
fdtol = 5.0e-4 if dtype == np.float64 else 2.0e-2
for ii in range(3):
for jj in range(3):
m3test = 1.0 * m3.numpy()
m3test[0, ii, jj] += dx
wp.launch(
kernel,
dim=1,
inputs=[wp.array(m3test, dtype=mat33, device=device)],
outputs=[Qout, Rout, outcomponents],
device=device,
)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
plusval = out.numpy()[0]
m3test = 1.0 * m3.numpy()
m3test[0, ii, jj] -= dx
wp.launch(
kernel,
dim=1,
inputs=[wp.array(m3test, dtype=mat33, device=device)],
outputs=[Qout, Rout, outcomponents],
device=device,
)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
minusval = out.numpy()[0]
assert_np_equal((plusval - minusval) / (2 * dx), m3grads[ii, jj], tol=fdtol)
def test_eig(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 4.0e-2,
np.float32: 1.0e-5,
np.float64: 1.0e-5,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
vec3 = wp.types.vector(length=3, dtype=wptype)
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
def check_mat_eig(
m3: wp.array(dtype=mat33),
Qout: wp.array(dtype=mat33),
dout: wp.array(dtype=vec3),
outcomponents: wp.array(dtype=wptype),
):
Q = mat33()
d = vec3()
wp.eig3(m3[0] + wp.transpose(m3[0]), Q, d)
Qout[0] = Q
dout[0] = d
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * Q[i, j]
idx = idx + 1
for i in range(3):
outcomponents[idx] = wptype(2) * d[i]
idx = idx + 1
kernel = getkernel(check_mat_eig, suffix=dtype.__name__)
output_select_kernel = get_select_kernel(wptype)
if register_kernels:
return
m3_np = randvals(rng, [1, 3, 3], dtype) + np.eye(3, dtype=dtype)
m3 = wp.array(m3_np, dtype=mat33, requires_grad=True, device=device)
outcomponents = wp.zeros(3 * 3 + 3, dtype=wptype, requires_grad=True, device=device)
Qout = wp.zeros(1, dtype=mat33, requires_grad=True, device=device)
dout = wp.zeros(1, dtype=vec3, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[m3], outputs=[Qout, dout, outcomponents], device=device)
Qout_np = Qout.numpy()[0].astype(np.float64)
dout_np = dout.numpy()[0].astype(np.float64)
Dout_np = np.diag(dout_np)
# check Q is orthogonal:
assert_np_equal(np.matmul(Qout_np.T, Qout_np), np.eye(3), tol=tol)
# check Q contains eigenvectors:
assert_np_equal(np.matmul(Qout_np, np.matmul(Dout_np, Qout_np.T)), (m3_np[0] + m3_np[0].transpose()), tol=tol)
if dtype == np.float16:
# I'm not even going to bother testing the gradients for float16
# because the rounding errors are terrible...
return
# check gradients:
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
idx = 0
for idx in range(len(outcomponents)):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[m3], outputs=[Qout, dout, outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
tape.backward(out)
m3grads = 1.0 * tape.gradients[m3].numpy()[0]
tape.zero()
dx = 0.0001
fdtol = 5.0e-4 if dtype == np.float64 else 2.0e-2
for ii in range(3):
for jj in range(3):
m3test = 1.0 * m3.numpy()
m3test[0, ii, jj] += dx
wp.launch(
kernel,
dim=1,
inputs=[wp.array(m3test, dtype=mat33, device=device)],
outputs=[Qout, dout, outcomponents],
device=device,
)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
plusval = out.numpy()[0]
m3test = 1.0 * m3.numpy()
m3test[0, ii, jj] -= dx
wp.launch(
kernel,
dim=1,
inputs=[wp.array(m3test, dtype=mat33, device=device)],
outputs=[Qout, dout, outcomponents],
device=device,
)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
minusval = out.numpy()[0]
assert_np_equal((plusval - minusval) / (2 * dx), m3grads[ii, jj], tol=fdtol)
def test_skew(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 1.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
vec3 = wp.types.vector(length=3, dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_skew(
v3: wp.array(dtype=vec3),
outcomponents: wp.array(dtype=wptype),
):
m3result = wp.skew(v3[0])
# multiply outputs by 2 so we've got something to backpropagate:
idx = 0
for i in range(3):
for j in range(3):
outcomponents[idx] = wptype(2) * m3result[i, j]
idx = idx + 1
kernel = getkernel(check_mat_skew, suffix=dtype.__name__)
if register_kernels:
return
v3 = wp.array(randvals(rng, [1, 3], dtype), dtype=vec3, requires_grad=True, device=device)
outcomponents = wp.zeros(3 * 3, dtype=wptype, requires_grad=True, device=device)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[v3], outputs=[outcomponents], device=device)
# make sure it gives you a cross product matrix:
crossprodmat = outcomponents.numpy().reshape(3, 3)
v = np.array([1, 0, 0])
assert_np_equal(
np.matmul(crossprodmat, np.array([1, 0, 0])).reshape(-1),
2 * np.cross(v3.numpy()[0], np.array([1, 0, 0])),
tol=tol,
)
assert_np_equal(
np.matmul(crossprodmat, np.array([0, 1, 0])).reshape(-1),
2 * np.cross(v3.numpy()[0], np.array([0, 1, 0])),
tol=tol,
)
assert_np_equal(
np.matmul(crossprodmat, np.array([0, 0, 1])).reshape(-1),
2 * np.cross(v3.numpy()[0], np.array([0, 0, 1])),
tol=tol,
)
# check it another way:
x0 = v3.numpy()[0, 0]
x1 = v3.numpy()[0, 1]
x2 = v3.numpy()[0, 2]
crossprodmat_expected = np.array(
[
[0, -x2, x1],
[x2, 0, -x0],
[-x1, x0, 0],
],
dtype=dtype,
)
assert_np_equal(crossprodmat, 2 * crossprodmat_expected, tol=tol)
if dtype in np_float_types:
idx = 0
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
for i in range(3):
for j in range(3):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[v3], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
tape.backward(loss=out)
if i == j:
assert_np_equal(tape.gradients[v3].numpy()[0], np.zeros(3))
elif [i, j] == [0, 1]:
assert_np_equal(tape.gradients[v3].numpy()[0], np.array([0, 0, -2]))
elif [i, j] == [1, 0]:
assert_np_equal(tape.gradients[v3].numpy()[0], np.array([0, 0, 2]))
elif [i, j] == [0, 2]:
assert_np_equal(tape.gradients[v3].numpy()[0], np.array([0, 2, 0]))
elif [i, j] == [2, 0]:
assert_np_equal(tape.gradients[v3].numpy()[0], np.array([0, -2, 0]))
elif [i, j] == [1, 2]:
assert_np_equal(tape.gradients[v3].numpy()[0], np.array([-2, 0, 0]))
elif [i, j] == [2, 1]:
assert_np_equal(tape.gradients[v3].numpy()[0], np.array([2, 0, 0]))
tape.zero()
idx = idx + 1
def test_transform_point(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
vec3 = wp.types.vector(length=3, dtype=wptype)
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_transform_point(
v3: wp.array(dtype=vec3),
m4: wp.array(dtype=mat44),
outcomponents: wp.array(dtype=wptype),
):
# multiply outputs by 2 so we've got something to backpropagate:
presult = wptype(2) * wp.transform_point(m4[0], v3[0])
outcomponents[0] = presult[0]
outcomponents[1] = presult[1]
outcomponents[2] = presult[2]
kernel = getkernel(check_mat_transform_point, suffix=dtype.__name__)
if register_kernels:
return
v3 = wp.array(randvals(rng, [1, 3], dtype), dtype=vec3, requires_grad=True, device=device)
m4 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=True, device=device)
outcomponents = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[v3, m4], outputs=[outcomponents], device=device)
v3homog = np.ones(4, dtype=dtype)
v3homog[:3] = v3.numpy()[0]
assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m4.numpy()[0], v3homog)[:3], tol=10 * tol)
if dtype in np_float_types:
for j in range(3):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[v3, m4], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, j], outputs=[out], device=device)
tape.backward(loss=out)
assert_np_equal(2 * m4.numpy()[0, j, :3], tape.gradients[v3].numpy(), tol=tol)
expected = np.zeros((4, 4), dtype=dtype)
expected[j, :3] = 2 * v3.numpy()
expected[j, 3] = 2
assert_np_equal(tape.gradients[m4].numpy(), expected, tol=tol)
tape.zero()
def test_transform_vector(test, device, dtype, register_kernels=False):
rng = np.random.default_rng(123)
tol = {
np.float16: 5.0e-3,
np.float32: 1.0e-6,
np.float64: 1.0e-8,
}.get(dtype, 0)
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
vec3 = wp.types.vector(length=3, dtype=wptype)
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
output_select_kernel = get_select_kernel(wptype)
def check_mat_transform_vector(
v3: wp.array(dtype=vec3),
m4: wp.array(dtype=mat44),
outcomponents: wp.array(dtype=wptype),
):
# multiply outputs by 2 so we've got something to backpropagate:
presult = wptype(2) * wp.transform_vector(m4[0], v3[0])
outcomponents[0] = presult[0]
outcomponents[1] = presult[1]
outcomponents[2] = presult[2]
kernel = getkernel(check_mat_transform_vector, suffix=dtype.__name__)
if register_kernels:
return
v3 = wp.array(randvals(rng, [1, 3], dtype), dtype=vec3, requires_grad=True, device=device)
m4 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=True, device=device)
outcomponents = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
wp.launch(kernel, dim=1, inputs=[v3, m4], outputs=[outcomponents], device=device)
v3homog = np.zeros(4, dtype=dtype)
v3homog[:3] = v3.numpy()[0]
assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m4.numpy()[0], v3homog)[:3], tol=10 * tol)
if dtype in np_float_types:
for j in range(3):
tape = wp.Tape()
with tape:
wp.launch(kernel, dim=1, inputs=[v3, m4], outputs=[outcomponents], device=device)
wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, j], outputs=[out], device=device)
tape.backward(loss=out)
assert_np_equal(2 * m4.numpy()[0, j, :3], tape.gradients[v3].numpy(), tol=tol)
expected = np.zeros((4, 4), dtype=dtype)
expected[j, :3] = 2 * v3.numpy()
assert_np_equal(tape.gradients[m4].numpy(), expected, tol=tol)
tape.zero()
# Test matrix constructors using explicit type (float16)
# note that these tests are specifically not using generics / closure
# args to create kernels dynamically (like the rest of this file)
# as those use different code paths to resolve arg types which
# has lead to regressions.
@wp.kernel
def test_constructors_explicit_precision():
# construction for custom matrix types
eye = wp.identity(dtype=wp.float16, n=2)
zeros = wp.matrix(shape=(2, 2), dtype=wp.float16)
custom = wp.matrix(wp.float16(0.0), wp.float16(1.0), wp.float16(2.0), wp.float16(3.0), shape=(2, 2))
for i in range(2):
for j in range(2):
if i == j:
wp.expect_eq(eye[i, j], wp.float16(1.0))
else:
wp.expect_eq(eye[i, j], wp.float16(0.0))
wp.expect_eq(zeros[i, j], wp.float16(0.0))
wp.expect_eq(custom[i, j], wp.float16(i) * wp.float16(2.0) + wp.float16(j))
mat32d = wp.mat(shape=(3, 2), dtype=wp.float64)
@wp.kernel
def test_matrix_constructor_value_func():
a = wp.mat22()
b = wp.matrix(a, shape=(2, 2))
c = mat32d()
d = mat32d(c, shape=(3, 2))
e = mat32d(wp.float64(1.0), wp.float64(2.0), wp.float64(1.0), wp.float64(2.0), wp.float64(1.0), wp.float64(2.0))
f = mat32d(
wp.vec3d(wp.float64(1.0), wp.float64(2.0), wp.float64(3.0)),
wp.vec3d(wp.float64(1.0), wp.float64(2.0), wp.float64(3.0)),
)
# Same as above but with a default (float/int) type
# which tests some different code paths that
# need to ensure types are correctly canonicalized
# during codegen
@wp.kernel
def test_constructors_default_precision():
# construction for default (float) matrix types
eye = wp.identity(dtype=float, n=2)
zeros = wp.matrix(shape=(2, 2), dtype=float)
custom = wp.matrix(0.0, 1.0, 2.0, 3.0, shape=(2, 2))
for i in range(2):
for j in range(2):
if i == j:
wp.expect_eq(eye[i, j], 1.0)
else:
wp.expect_eq(eye[i, j], 0.0)
wp.expect_eq(zeros[i, j], 0.0)
wp.expect_eq(custom[i, j], float(i) * 2.0 + float(j))
@wp.kernel
def test_matrix_mutation(expected: wp.types.matrix(shape=(10, 3), dtype=float)):
m = wp.matrix(shape=(10, 3), dtype=float)
# test direct element indexing
m[0, 0] = 1.0
m[0, 1] = 2.0
m[0, 2] = 3.0
# The nested indexing (matrix->vector->scalar) below does not
# currently modify m because m[0] returns row vector by
# value rather than reference, this is different from NumPy
# which always returns by ref. Not clear how we can support
# this as well as auto-diff.
# m[0][1] = 2.0
# m[0][2] = 3.0
# test setting rows
for i in range(1, 10):
m[i] = m[i - 1] + wp.vec3(1.0, 2.0, 3.0)
wp.expect_eq(m, expected)
CONSTANT_SHAPE_ROWS = wp.constant(10)
CONSTANT_SHAPE_COLS = wp.constant(10)
# tests that we can use global constants in shape keyword argument
# for matrix constructor
@wp.kernel
def test_constructors_constant_shape():
m = wp.matrix(shape=(CONSTANT_SHAPE_ROWS, CONSTANT_SHAPE_COLS), dtype=float)
for i in range(CONSTANT_SHAPE_ROWS):
for j in range(CONSTANT_SHAPE_COLS):
m[i, j] = float(i * j)
devices = get_test_devices()
class TestMat(unittest.TestCase):
pass
add_kernel_test(TestMat, test_constructors_explicit_precision, dim=1, devices=devices)
add_kernel_test(TestMat, test_constructors_default_precision, dim=1, devices=devices)
add_kernel_test(TestMat, test_constructors_constant_shape, dim=1, devices=devices)
add_kernel_test(TestMat, test_matrix_constructor_value_func, dim=1, devices=devices)
mat103 = wp.types.matrix(shape=(10, 3), dtype=float)
add_kernel_test(
TestMat,
test_matrix_mutation,
dim=1,
inputs=[
mat103(
1.0,
2.0,
3.0,
2.0,
4.0,
6.0,
3.0,
6.0,
9.0,
4.0,
8.0,
12.0,
5.0,
10.0,
15.0,
6.0,
12.0,
18.0,
7.0,
14.0,
21.0,
8.0,
16.0,
24.0,
9.0,
18.0,
27.0,
10.0,
20.0,
30.0,
)
],
devices=devices,
)
for dtype in np_signed_int_types + np_float_types:
add_function_test_register_kernel(
TestMat, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestMat, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
)
add_function_test(
TestMat,
"test_anon_constructor_error_shape_keyword_missing",
test_anon_constructor_error_shape_keyword_missing,
devices=devices,
)
add_function_test(
TestMat,
"test_anon_constructor_error_dtype_keyword_missing",
test_anon_constructor_error_dtype_keyword_missing,
devices=devices,
)
add_function_test(
TestMat,
"test_anon_constructor_error_shape_mismatch",
test_anon_constructor_error_shape_mismatch,
devices=devices,
)
add_function_test(
TestMat,
"test_anon_constructor_error_invalid_arg_count",
test_anon_constructor_error_invalid_arg_count,
devices=devices,
)
add_function_test(
TestMat,
"test_tpl_constructor_error_incompatible_sizes",
test_tpl_constructor_error_incompatible_sizes,
devices=devices,
)
add_function_test(
TestMat,
"test_tpl_constructor_error_invalid_scalar_type",
test_tpl_constructor_error_invalid_scalar_type,
devices=devices,
)
add_function_test(
TestMat,
"test_tpl_constructor_error_invalid_vector_count",
test_tpl_constructor_error_invalid_vector_count,
devices=devices,
)
add_function_test(
TestMat,
"test_tpl_constructor_error_invalid_vector_shape",
test_tpl_constructor_error_invalid_vector_shape,
devices=devices,
)
add_function_test(
TestMat,
"test_tpl_constructor_error_invalid_arg_count",
test_tpl_constructor_error_invalid_arg_count,
devices=devices,
)
add_function_test(TestMat, "test_tpl_ops_with_anon", test_tpl_ops_with_anon)
for dtype in np_float_types:
add_function_test(
TestMat, f"test_py_arithmetic_ops_{dtype.__name__}", test_py_arithmetic_ops, devices=None, dtype=dtype
)
add_function_test_register_kernel(
TestMat, f"test_quat_constructor_{dtype.__name__}", test_quat_constructor, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestMat, f"test_inverse_{dtype.__name__}", test_inverse, devices=devices, dtype=dtype
)
add_function_test_register_kernel(TestMat, f"test_svd_{dtype.__name__}", test_svd, devices=devices, dtype=dtype)
add_function_test_register_kernel(TestMat, f"test_qr_{dtype.__name__}", test_qr, devices=devices, dtype=dtype)
add_function_test_register_kernel(TestMat, f"test_eig_{dtype.__name__}", test_eig, devices=devices, dtype=dtype)
add_function_test_register_kernel(
TestMat, f"test_transform_point_{dtype.__name__}", test_transform_point, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestMat, f"test_transform_vector_{dtype.__name__}", test_transform_vector, devices=devices, dtype=dtype
)
add_function_test_register_kernel(
TestMat, f"test_determinant_{dtype.__name__}", test_determinant, devices=devices, dtype=dtype
)
add_function_test_register_kernel(TestMat, f"test_skew_{dtype.__name__}", test_skew, devices=devices, dtype=dtype)
if __name__ == "__main__":
wp.build.clear_kernel_cache()
unittest.main(verbosity=2, failfast=True)