Spaces:

qbhf2
/

GarmentCode

Sleeping

App Files Files Community

GarmentCode / NvidiaWarp-GarmentCode /warp /tests /test_fp16.py

qbhf2

added NvidiaWarp and GarmentCode repos

66c9c8a 11 months ago

raw

history blame contribute delete

3.83 kB

	# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
	# NVIDIA CORPORATION and its licensors retain all intellectual property
	# and proprietary rights in and to this software, related documentation
	# and any modifications thereto. Any use, reproduction, disclosure or
	# distribution of this software and related documentation without an express
	# license agreement from NVIDIA CORPORATION is strictly prohibited.

	import unittest

	import numpy as np

	import warp as wp
	from warp.tests.unittest_utils import *

	wp.init()


	@wp.kernel
	def load_store_half(f32: wp.array(dtype=wp.float32), f16: wp.array(dtype=wp.float16)):
	tid = wp.tid()

	# check conversion from f32->f16
	a = wp.float16(f32[tid])
	b = f16[tid]

	wp.expect_eq(a, b)

	# check stores
	f16[tid] = a


	def test_fp16_conversion(test, device):
	s = [1.0, 2.0, 3.0, -3.14159]

	np_f32 = np.array(s, dtype=np.float32)
	np_f16 = np.array(s, dtype=np.float16)

	wp_f32 = wp.array(s, dtype=wp.float32, device=device)
	wp_f16 = wp.array(s, dtype=wp.float16, device=device)

	assert_np_equal(np_f32, wp_f32.numpy())
	assert_np_equal(np_f16, wp_f16.numpy())

	wp.launch(load_store_half, dim=len(s), inputs=[wp_f32, wp_f16], device=device)

	# check that stores worked
	assert_np_equal(np_f16, wp_f16.numpy())


	@wp.kernel
	def value_load_store_half(f16_value: wp.float16, f16_array: wp.array(dtype=wp.float16)):
	wp.expect_eq(f16_value, f16_array[0])

	# check stores
	f16_array[0] = f16_value


	def test_fp16_kernel_parameter(test, device):
	"""Test the ability to pass in fp16 into kernels as parameters"""

	s = [1.0, 2.0, 3.0, -3.14159]

	for test_val in s:
	np_f16 = np.array([test_val], dtype=np.float16)
	wp_f16 = wp.array([test_val], dtype=wp.float16, device=device)

	wp.launch(value_load_store_half, (1,), inputs=[wp.float16(test_val), wp_f16], device=device)

	# check that stores worked
	assert_np_equal(np_f16, wp_f16.numpy())

	# Do the same thing but pass in test_val as a Python float to test automatic conversion
	wp_f16 = wp.array([test_val], dtype=wp.float16, device=device)
	wp.launch(value_load_store_half, (1,), inputs=[test_val, wp_f16], device=device)
	assert_np_equal(np_f16, wp_f16.numpy())


	@wp.kernel
	def mul_half(input: wp.array(dtype=wp.float16), output: wp.array(dtype=wp.float16)):
	tid = wp.tid()

	# convert to compute type fp32
	x = wp.float(input[tid]) * 2.0

	# store back as fp16
	output[tid] = wp.float16(x)


	def test_fp16_grad(test, device):
	rng = np.random.default_rng(123)

	# checks that gradients are correctly propagated for
	# fp16 arrays, even when intermediate calculations
	# are performed in e.g.: fp32

	s = rng.random(size=15).astype(np.float16)

	input = wp.array(s, dtype=wp.float16, device=device, requires_grad=True)
	output = wp.zeros_like(input)

	tape = wp.Tape()
	with tape:
	wp.launch(mul_half, dim=len(s), inputs=[input, output], device=device)

	ones = wp.array(np.ones(len(output)), dtype=wp.float16, device=device)

	tape.backward(grads={output: ones})

	assert_np_equal(input.grad.numpy(), np.ones(len(s)) * 2.0)


	class TestFp16(unittest.TestCase):
	pass


	devices = []
	if wp.is_cpu_available():
	devices.append("cpu")
	for cuda_device in get_unique_cuda_test_devices():
	if cuda_device.arch >= 70:
	devices.append(cuda_device)

	add_function_test(TestFp16, "test_fp16_conversion", test_fp16_conversion, devices=devices)
	add_function_test(TestFp16, "test_fp16_grad", test_fp16_grad, devices=devices)
	add_function_test(TestFp16, "test_fp16_kernel_parameter", test_fp16_kernel_parameter, devices=devices)


	if __name__ == "__main__":
	wp.build.clear_kernel_cache()
	unittest.main(verbosity=2)