Spaces:
Sleeping
Sleeping
File size: 3,833 Bytes
66c9c8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import unittest
import numpy as np
import warp as wp
from warp.tests.unittest_utils import *
wp.init()
@wp.kernel
def load_store_half(f32: wp.array(dtype=wp.float32), f16: wp.array(dtype=wp.float16)):
tid = wp.tid()
# check conversion from f32->f16
a = wp.float16(f32[tid])
b = f16[tid]
wp.expect_eq(a, b)
# check stores
f16[tid] = a
def test_fp16_conversion(test, device):
s = [1.0, 2.0, 3.0, -3.14159]
np_f32 = np.array(s, dtype=np.float32)
np_f16 = np.array(s, dtype=np.float16)
wp_f32 = wp.array(s, dtype=wp.float32, device=device)
wp_f16 = wp.array(s, dtype=wp.float16, device=device)
assert_np_equal(np_f32, wp_f32.numpy())
assert_np_equal(np_f16, wp_f16.numpy())
wp.launch(load_store_half, dim=len(s), inputs=[wp_f32, wp_f16], device=device)
# check that stores worked
assert_np_equal(np_f16, wp_f16.numpy())
@wp.kernel
def value_load_store_half(f16_value: wp.float16, f16_array: wp.array(dtype=wp.float16)):
wp.expect_eq(f16_value, f16_array[0])
# check stores
f16_array[0] = f16_value
def test_fp16_kernel_parameter(test, device):
"""Test the ability to pass in fp16 into kernels as parameters"""
s = [1.0, 2.0, 3.0, -3.14159]
for test_val in s:
np_f16 = np.array([test_val], dtype=np.float16)
wp_f16 = wp.array([test_val], dtype=wp.float16, device=device)
wp.launch(value_load_store_half, (1,), inputs=[wp.float16(test_val), wp_f16], device=device)
# check that stores worked
assert_np_equal(np_f16, wp_f16.numpy())
# Do the same thing but pass in test_val as a Python float to test automatic conversion
wp_f16 = wp.array([test_val], dtype=wp.float16, device=device)
wp.launch(value_load_store_half, (1,), inputs=[test_val, wp_f16], device=device)
assert_np_equal(np_f16, wp_f16.numpy())
@wp.kernel
def mul_half(input: wp.array(dtype=wp.float16), output: wp.array(dtype=wp.float16)):
tid = wp.tid()
# convert to compute type fp32
x = wp.float(input[tid]) * 2.0
# store back as fp16
output[tid] = wp.float16(x)
def test_fp16_grad(test, device):
rng = np.random.default_rng(123)
# checks that gradients are correctly propagated for
# fp16 arrays, even when intermediate calculations
# are performed in e.g.: fp32
s = rng.random(size=15).astype(np.float16)
input = wp.array(s, dtype=wp.float16, device=device, requires_grad=True)
output = wp.zeros_like(input)
tape = wp.Tape()
with tape:
wp.launch(mul_half, dim=len(s), inputs=[input, output], device=device)
ones = wp.array(np.ones(len(output)), dtype=wp.float16, device=device)
tape.backward(grads={output: ones})
assert_np_equal(input.grad.numpy(), np.ones(len(s)) * 2.0)
class TestFp16(unittest.TestCase):
pass
devices = []
if wp.is_cpu_available():
devices.append("cpu")
for cuda_device in get_unique_cuda_test_devices():
if cuda_device.arch >= 70:
devices.append(cuda_device)
add_function_test(TestFp16, "test_fp16_conversion", test_fp16_conversion, devices=devices)
add_function_test(TestFp16, "test_fp16_grad", test_fp16_grad, devices=devices)
add_function_test(TestFp16, "test_fp16_kernel_parameter", test_fp16_kernel_parameter, devices=devices)
if __name__ == "__main__":
wp.build.clear_kernel_cache()
unittest.main(verbosity=2)
|