Spaces:
Sleeping
Sleeping
| # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. | |
| # NVIDIA CORPORATION and its licensors retain all intellectual property | |
| # and proprietary rights in and to this software, related documentation | |
| # and any modifications thereto. Any use, reproduction, disclosure or | |
| # distribution of this software and related documentation without an express | |
| # license agreement from NVIDIA CORPORATION is strictly prohibited. | |
| import unittest | |
| import numpy as np | |
| import warp as wp | |
| from warp.tests.unittest_utils import * | |
| wp.init() | |
| def load_store_half(f32: wp.array(dtype=wp.float32), f16: wp.array(dtype=wp.float16)): | |
| tid = wp.tid() | |
| # check conversion from f32->f16 | |
| a = wp.float16(f32[tid]) | |
| b = f16[tid] | |
| wp.expect_eq(a, b) | |
| # check stores | |
| f16[tid] = a | |
| def test_fp16_conversion(test, device): | |
| s = [1.0, 2.0, 3.0, -3.14159] | |
| np_f32 = np.array(s, dtype=np.float32) | |
| np_f16 = np.array(s, dtype=np.float16) | |
| wp_f32 = wp.array(s, dtype=wp.float32, device=device) | |
| wp_f16 = wp.array(s, dtype=wp.float16, device=device) | |
| assert_np_equal(np_f32, wp_f32.numpy()) | |
| assert_np_equal(np_f16, wp_f16.numpy()) | |
| wp.launch(load_store_half, dim=len(s), inputs=[wp_f32, wp_f16], device=device) | |
| # check that stores worked | |
| assert_np_equal(np_f16, wp_f16.numpy()) | |
| def value_load_store_half(f16_value: wp.float16, f16_array: wp.array(dtype=wp.float16)): | |
| wp.expect_eq(f16_value, f16_array[0]) | |
| # check stores | |
| f16_array[0] = f16_value | |
| def test_fp16_kernel_parameter(test, device): | |
| """Test the ability to pass in fp16 into kernels as parameters""" | |
| s = [1.0, 2.0, 3.0, -3.14159] | |
| for test_val in s: | |
| np_f16 = np.array([test_val], dtype=np.float16) | |
| wp_f16 = wp.array([test_val], dtype=wp.float16, device=device) | |
| wp.launch(value_load_store_half, (1,), inputs=[wp.float16(test_val), wp_f16], device=device) | |
| # check that stores worked | |
| assert_np_equal(np_f16, wp_f16.numpy()) | |
| # Do the same thing but pass in test_val as a Python float to test automatic conversion | |
| wp_f16 = wp.array([test_val], dtype=wp.float16, device=device) | |
| wp.launch(value_load_store_half, (1,), inputs=[test_val, wp_f16], device=device) | |
| assert_np_equal(np_f16, wp_f16.numpy()) | |
| def mul_half(input: wp.array(dtype=wp.float16), output: wp.array(dtype=wp.float16)): | |
| tid = wp.tid() | |
| # convert to compute type fp32 | |
| x = wp.float(input[tid]) * 2.0 | |
| # store back as fp16 | |
| output[tid] = wp.float16(x) | |
| def test_fp16_grad(test, device): | |
| rng = np.random.default_rng(123) | |
| # checks that gradients are correctly propagated for | |
| # fp16 arrays, even when intermediate calculations | |
| # are performed in e.g.: fp32 | |
| s = rng.random(size=15).astype(np.float16) | |
| input = wp.array(s, dtype=wp.float16, device=device, requires_grad=True) | |
| output = wp.zeros_like(input) | |
| tape = wp.Tape() | |
| with tape: | |
| wp.launch(mul_half, dim=len(s), inputs=[input, output], device=device) | |
| ones = wp.array(np.ones(len(output)), dtype=wp.float16, device=device) | |
| tape.backward(grads={output: ones}) | |
| assert_np_equal(input.grad.numpy(), np.ones(len(s)) * 2.0) | |
| class TestFp16(unittest.TestCase): | |
| pass | |
| devices = [] | |
| if wp.is_cpu_available(): | |
| devices.append("cpu") | |
| for cuda_device in get_unique_cuda_test_devices(): | |
| if cuda_device.arch >= 70: | |
| devices.append(cuda_device) | |
| add_function_test(TestFp16, "test_fp16_conversion", test_fp16_conversion, devices=devices) | |
| add_function_test(TestFp16, "test_fp16_grad", test_fp16_grad, devices=devices) | |
| add_function_test(TestFp16, "test_fp16_kernel_parameter", test_fp16_kernel_parameter, devices=devices) | |
| if __name__ == "__main__": | |
| wp.build.clear_kernel_cache() | |
| unittest.main(verbosity=2) | |