qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
###########################################################################
# Bechmarks for kernel launches with different types of args
###########################################################################
import warp as wp
@wp.struct
class S0:
pass
@wp.struct
class Sf:
x: float
y: float
z: float
@wp.struct
class Sv:
u: wp.vec3
v: wp.vec3
w: wp.vec3
@wp.struct
class Sm:
M: wp.mat33
N: wp.mat33
O: wp.mat33
@wp.struct
class Sa:
a: wp.array(dtype=float)
b: wp.array(dtype=float)
c: wp.array(dtype=float)
@wp.struct
class Sz:
a: wp.array(dtype=float)
b: wp.array(dtype=float)
c: wp.array(dtype=float)
x: float
y: float
z: float
u: wp.vec3
v: wp.vec3
w: wp.vec3
@wp.kernel
def k0():
tid = wp.tid()
@wp.kernel
def kf(x: float, y: float, z: float):
tid = wp.tid()
@wp.kernel
def kv(u: wp.vec3, v: wp.vec3, w: wp.vec3):
tid = wp.tid()
@wp.kernel
def km(M: wp.mat33, N: wp.mat33, O: wp.mat33):
tid = wp.tid()
@wp.kernel
def ka(a: wp.array(dtype=float), b: wp.array(dtype=float), c: wp.array(dtype=float)):
tid = wp.tid()
@wp.kernel
def kz(
a: wp.array(dtype=float),
b: wp.array(dtype=float),
c: wp.array(dtype=float),
x: float,
y: float,
z: float,
u: wp.vec3,
v: wp.vec3,
w: wp.vec3,
):
tid = wp.tid()
@wp.kernel
def ks0(s: S0):
tid = wp.tid()
@wp.kernel
def ksf(s: Sf):
tid = wp.tid()
@wp.kernel
def ksv(s: Sv):
tid = wp.tid()
@wp.kernel
def ksm(s: Sm):
tid = wp.tid()
@wp.kernel
def ksa(s: Sa):
tid = wp.tid()
@wp.kernel
def ksz(s: Sz):
tid = wp.tid()
wp.init()
wp.build.clear_kernel_cache()
devices = wp.get_devices()
num_launches = 100000
for device in devices:
with wp.ScopedDevice(device):
print(f"\n=================== Device '{device}' ===================")
wp.force_load(device)
n = 1
a = wp.zeros(n, dtype=float)
b = wp.zeros(n, dtype=float)
c = wp.zeros(n, dtype=float)
x = 17.0
y = 42.0
z = 99.0
u = wp.vec3(1, 2, 3)
v = wp.vec3(10, 20, 30)
w = wp.vec3(100, 200, 300)
M = wp.mat33()
N = wp.mat33()
O = wp.mat33()
s0 = S0()
sf = Sf()
sf.x = x
sf.y = y
sf.z = z
sv = Sv()
sv.u = u
sv.v = v
sv.w = w
sm = Sm()
sm.M = M
sm.N = N
sm.O = O
sa = Sa()
sa.a = a
sa.b = b
sa.c = c
sz = Sz()
sz.a = a
sz.b = b
sz.c = c
sz.x = x
sz.y = y
sz.z = z
sz.u = u
sz.v = v
sz.w = w
tk0 = wp.ScopedTimer("k0")
tkf = wp.ScopedTimer("kf")
tkv = wp.ScopedTimer("kv")
tkm = wp.ScopedTimer("km")
tka = wp.ScopedTimer("ka")
tkz = wp.ScopedTimer("kz")
ts0 = wp.ScopedTimer("s0")
tsf = wp.ScopedTimer("sf")
tsv = wp.ScopedTimer("sv")
tsm = wp.ScopedTimer("sm")
tsa = wp.ScopedTimer("sa")
tsz = wp.ScopedTimer("sz")
wp.synchronize_device()
with tk0:
for _ in range(num_launches):
wp.launch(k0, dim=1, inputs=[])
wp.synchronize_device()
with tkf:
for _ in range(num_launches):
wp.launch(kf, dim=1, inputs=[x, y, z])
wp.synchronize_device()
with tkv:
for _ in range(num_launches):
wp.launch(kv, dim=1, inputs=[u, v, w])
wp.synchronize_device()
with tkm:
for _ in range(num_launches):
wp.launch(km, dim=1, inputs=[M, N, O])
wp.synchronize_device()
with tka:
for _ in range(num_launches):
wp.launch(ka, dim=1, inputs=[a, b, c])
wp.synchronize_device()
with tkz:
for _ in range(num_launches):
wp.launch(kz, dim=1, inputs=[a, b, c, x, y, z, u, v, w])
# structs
wp.synchronize_device()
with ts0:
for _ in range(num_launches):
wp.launch(ks0, dim=1, inputs=[s0])
wp.synchronize_device()
with tsf:
for _ in range(num_launches):
wp.launch(ksf, dim=1, inputs=[sf])
wp.synchronize_device()
with tsv:
for _ in range(num_launches):
wp.launch(ksv, dim=1, inputs=[sv])
wp.synchronize_device()
with tsm:
for _ in range(num_launches):
wp.launch(ksm, dim=1, inputs=[sm])
wp.synchronize_device()
with tsa:
for _ in range(num_launches):
wp.launch(ksa, dim=1, inputs=[sa])
wp.synchronize_device()
with tsz:
for _ in range(num_launches):
wp.launch(ksz, dim=1, inputs=[sz])
wp.synchronize_device()
timers = [
[tk0, ts0],
[tkf, tsf],
[tkv, tsv],
[tkm, tsm],
[tka, tsa],
[tkz, tsz],
]
print("--------------------------------")
print(f"| args | direct | struct |")
print("--------------------------------")
for tk, ts in timers:
print(f"| {tk.name} |{tk.elapsed:10.0f} |{ts.elapsed:10.0f} |")
print("--------------------------------")