Spaces:

qbhf2
/

GarmentCode

Sleeping

App Files Files Community

GarmentCode / NvidiaWarp-GarmentCode /warp /tests /test_large.py

qbhf2

added NvidiaWarp and GarmentCode repos

66c9c8a 11 months ago

raw

history blame contribute delete

4.74 kB

	# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
	# NVIDIA CORPORATION and its licensors retain all intellectual property
	# and proprietary rights in and to this software, related documentation
	# and any modifications thereto. Any use, reproduction, disclosure or
	# distribution of this software and related documentation without an express
	# license agreement from NVIDIA CORPORATION is strictly prohibited.

	import math
	import unittest

	import warp as wp
	from warp.tests.unittest_utils import *

	wp.init()


	@wp.kernel
	def conditional_sum(result: wp.array(dtype=wp.uint64)):
	i, j, k = wp.tid()

	if i == 0:
	wp.atomic_add(result, 0, wp.uint64(1))


	def test_large_launch_large_kernel(test, device):
	"""Test tid() on kernel launch of 2**33 threads.

	The function conditional sum will add 1 to result for every thread that has an i index of 0.
	Due to the size of the grid, this test is not run on CPUs
	"""
	test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)

	large_dim_length = 2**16
	half_result = large_dim_length * large_dim_length

	wp.launch(kernel=conditional_sum, dim=[2, large_dim_length, large_dim_length], inputs=[test_result], device=device)
	test.assertEqual(test_result.numpy()[0], half_result)


	@wp.kernel
	def count_elements(result: wp.array(dtype=wp.uint64)):
	wp.atomic_add(result, 0, wp.uint64(1))


	def test_large_launch_max_blocks(test, device):
	# Loop over 1000x1x1 elements using a grid of 256 threads
	test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
	wp.launch(count_elements, (1000,), inputs=[test_result], max_blocks=1, device=device)
	test.assertEqual(test_result.numpy()[0], 1000)

	# Loop over 2x10x10 elements using a grid of 256 threads, using the tid() index to count half the elements
	test_result.zero_()
	wp.launch(
	conditional_sum,
	(
	2,
	50,
	10,
	),
	inputs=[test_result],
	max_blocks=1,
	device=device,
	)
	test.assertEqual(test_result.numpy()[0], 500)


	def test_large_launch_very_large_kernel(test, device):
	"""Due to the size of the grid, this test is not run on CPUs"""

	# Dim is chosen to be larger than the maximum CUDA one-dimensional grid size (total threads)
	dim = (2*31 - 1) 256 + 1
	test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
	wp.launch(count_elements, (dim,), inputs=[test_result], device=device)
	test.assertEqual(test_result.numpy()[0], dim)


	def test_large_arrays_slow(test, device):
	# The goal of this test is to use arrays just large enough to know
	# if there's a flaw in handling arrays with more than 2**31-1 elements
	# Unfortunately, it takes a long time to run so it won't be run automatically
	# without changes to support how frequently a test may be run
	total_elements = 2**31 + 8

	# 1-D to 4-D arrays: test zero_, fill_, then zero_ for scalar data types:
	for total_dims in range(1, 5):
	dim_x = math.ceil(total_elements ** (1 / total_dims))
	shape_tuple = tuple([dim_x] * total_dims)

	for nptype, wptype in wp.types.np_dtype_to_warp_type.items():
	a1 = wp.zeros(shape_tuple, dtype=wptype, device=device)
	assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))

	a1.fill_(127)
	assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))

	a1.zero_()
	assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))


	def test_large_arrays_fast(test, device):
	# A truncated version of test_large_arrays_slow meant to catch basic errors
	total_elements = 2**31 + 8

	nptype = np.dtype(np.int8)
	wptype = wp.types.np_dtype_to_warp_type[nptype]

	a1 = wp.zeros((total_elements,), dtype=wptype, device=device)
	assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))

	a1.fill_(127)
	assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))

	a1.zero_()
	assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))


	devices = get_test_devices()


	class TestLarge(unittest.TestCase):
	pass


	add_function_test(
	TestLarge, "test_large_launch_large_kernel", test_large_launch_large_kernel, devices=get_unique_cuda_test_devices()
	)

	add_function_test(TestLarge, "test_large_launch_max_blocks", test_large_launch_max_blocks, devices=devices)
	add_function_test(
	TestLarge,
	"test_large_launch_very_large_kernel",
	test_large_launch_very_large_kernel,
	devices=get_unique_cuda_test_devices(),
	)

	add_function_test(TestLarge, "test_large_arrays_fast", test_large_arrays_fast, devices=devices)


	if __name__ == "__main__":
	wp.build.clear_kernel_cache()
	unittest.main(verbosity=2)