File size: 18,494 Bytes
66c9c8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
# Copyright (c) 2023 NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

"""Sample node that simulates flocking behaviors by animating prim attributes."""

import math
import traceback
import numpy as np

import carb.settings
import omni.kit.app
import omni.graph.core as og
import omni.usd
import usdrt
import warp as wp

import omni.warp.nodes
from omni.warp.nodes.ogn.OgnSamplePrimFlockingDatabase import OgnSamplePrimFlockingDatabase


# device used for flocking simulation
MAIN_DEVICE = "cuda:0"

# device used for updating colors
COLOR_DEVICE = "cpu"


#   Kernels
# -----------------------------------------------------------------------------


@wp.struct
class Boid:
    vel: wp.vec3f
    wander_angles: wp.vec2f
    mass: float
    group: int


@wp.struct
class Obstacle:
    pos: wp.vec3f
    radius: float


@wp.struct
class World:
    lower: wp.vec3f
    upper: wp.vec3f
    grid: wp.uint64
    seed: int
    biases: wp.mat33f
    obstacles: wp.array(dtype=Obstacle)


@wp.kernel(enable_backward=False)
def copy_positions(dst: wp.array(dtype=wp.vec3f), src: wp.fabricarray(dtype=wp.vec3d)):
    tid = wp.tid()
    pos = src[tid]
    dst[tid] = wp.vec3f(float(pos[0]), float(pos[1]), float(pos[2]))


@wp.kernel(enable_backward=False)
def assign_colors(
    glows: wp.array(dtype=float),
    groups: wp.array(dtype=int),
    color_ramps: wp.array2d(dtype=wp.vec3f),
    colors: wp.fabricarrayarray(dtype=wp.vec3f)
):
    tid = wp.tid()

    glow = glows[tid]
    group = groups[tid]

    if glow < 0.4:
        alpha = glow / 0.4
        colors[tid][0] = (1.0 - alpha) * color_ramps[group, 0] + alpha * color_ramps[group, 1]
    elif glow < 0.8:
        alpha = (glow - 0.4) / 0.4
        colors[tid][0] = (1.0 - alpha) * color_ramps[group, 1] + alpha * color_ramps[group, 2]
    else:
        alpha = (glow - 0.8) / 0.2
        colors[tid][0] = (1.0 - alpha) * color_ramps[group, 2] + alpha * color_ramps[group, 3]


@wp.func
def intersect_ray_sphere(origin: wp.vec3f, dir: wp.vec3f, center: wp.vec3f, radius: float):

    to_sphere = center - origin

    tc = wp.dot(to_sphere, dir)

    if tc < 0.0:
        return tc
    
    d = wp.sqrt(wp.length_sq(to_sphere) - tc * tc)
    if d < 0.0:
        return -999999.0
    
    ts = wp.sqrt(radius * radius - d * d)

    return tc - ts


@wp.kernel(enable_backward=False)
def boids(
    boids: wp.array(dtype=Boid),
    world: World,
    dt: float,
    positions: wp.fabricarray(dtype=wp.vec3d),
    orientations: wp.fabricarray(dtype=wp.quatf),
    glows: wp.array(dtype=float),
):
    tid = wp.tid()

    boid = boids[tid]
    
    old_pos = positions[tid]
    old_rot = orientations[tid]

    pos = wp.vec3(float(old_pos[0]), float(old_pos[1]), float(old_pos[2]))
    vel = boid.vel

    forward = wp.quat_rotate(old_rot, wp.vec3f(1.0, 0.0, 0.0))

    force = wp.vec3f(0.0)

    # obstacle avoidance
    depenetration_force = 100.0
    avoidance_dist = 20.0
    avoidance_force = 20.0
    obstacles = world.obstacles
    num_obstacles = obstacles.shape[0]
    for i in range(num_obstacles):
        obstacle = obstacles[i]
        to_obstacle = obstacle.pos - pos
        # use padded radius
        radius = obstacle.radius + 2.0
        if wp.length(to_obstacle) < radius:
            # depenetration
            force += depenetration_force * wp.normalize(-to_obstacle)
        else:
            # avoidance
            t = intersect_ray_sphere(pos, forward, obstacle.pos, radius)
            if t > 0.0 and t < avoidance_dist:
                intersection_point = pos + t * forward
                out = intersection_point - obstacle.pos
                force += avoidance_force * (1.0 - t / avoidance_dist) * wp.normalize(out)

    # wander
    r = 10.0
    s0 = wp.sin(boid.wander_angles[0])
    c0 = wp.cos(boid.wander_angles[0])
    s1 = wp.sin(boid.wander_angles[1])
    c1 = wp.cos(boid.wander_angles[1])
    p = wp.vec3f(r * s0 * s1, r * s0 * c1, r * c0)
    offset = r + 1.0
    target = pos + wp.quat_rotate(old_rot, wp.vec3f(offset, 0.0, 0.0) + p)

    wander_force = 7.0
    force += wander_force * wp.normalize(target - pos)

    state = wp.rand_init(world.seed, tid)

    angle0 = boid.wander_angles[0] + wp.pi * (0.1 - 0.2 * wp.randf(state))
    angle1 = boid.wander_angles[1] + wp.pi * (0.1 - 0.2 * wp.randf(state))
    boid.wander_angles = wp.vec2f(angle0, angle1)

    cohesion_radius = 15.0
    cohesion_force = 20.0

    separation_radius = 10.0
    separation_force = 100.0

    # flocking
    query = wp.hash_grid_query(world.grid, pos, cohesion_radius)
    num_neighbors = int(0)
    num_align_neighbors = int(0)
    num_cohesion_neighbors = float(0)
    num_decohesion_neighbors = float(0)
    cohesion_pos_sum = wp.vec3f(0.0)
    decohesion_pos_sum = wp.vec3f(0.0)
    vel_sum = wp.vec3f(0.0)
    for index in query:
        if index != tid:
            other = boids[index]
            other_pos64 = positions[index]
            other_pos = wp.vec3f(float(other_pos64[0]), float(other_pos64[1]), float(other_pos64[2]))
            dist = wp.length(pos - other_pos)

            if dist < cohesion_radius:
                to_other = wp.normalize(other_pos - pos)
                # separation
                if dist < separation_radius:
                    force -= separation_force * (1.0 - dist / separation_radius) * to_other
                # cohesion
                bias = world.biases[boid.group, other.group]
                if bias > 0.0:
                    cohesion_pos_sum += bias * other_pos
                    num_cohesion_neighbors += bias
                else:
                    decohesion_pos_sum -= bias * other_pos
                    num_decohesion_neighbors -= bias
                # alignment
                if other.group == boid.group:
                    vel_sum += bias * other.vel
                    num_align_neighbors += 1
                num_neighbors += 1

    # align
    if num_align_neighbors > 0:
        vel_avg = vel_sum / float(num_align_neighbors)
        force += vel_avg - vel

    # cohere
    if num_cohesion_neighbors > 0.0:
        cohesion_pos_avg = cohesion_pos_sum / float(num_cohesion_neighbors)
        force += cohesion_force * wp.normalize(cohesion_pos_avg - pos)

    # decohere (group separation)
    if num_decohesion_neighbors > 0.0:
        decohesion_pos_avg = decohesion_pos_sum / float(num_decohesion_neighbors)
        force += cohesion_force * wp.normalize(pos - decohesion_pos_avg)

    # boundaries
    boundary_force = 20.0
    if pos[0] >= world.upper[0]:
        force += wp.vec3f(-boundary_force, 0.0, 0.0)
    if pos[0] <= world.lower[0]:
        force += wp.vec3f(boundary_force, 0.0, 0.0)
    if pos[1] >= world.upper[1]:
        force += wp.vec3f(0.0, -0.5 * boundary_force, 0.0)
    if pos[1] <= world.lower[1]:
        force += wp.vec3f(0.0, 5.0 * boundary_force, 0.0)
    if pos[2] >= world.upper[2]:
        force += wp.vec3f(0.0, 0.0, -boundary_force)
    if pos[2] <= world.lower[2]:
        force += wp.vec3f(0.0, 0.0, boundary_force)

    vel += dt * force / boid.mass

    # clamp speed
    max_speed = 15.0
    speed_sq = wp.length_sq(vel)
    if speed_sq > max_speed * max_speed:
        vel = max_speed * wp.normalize(vel)

    # update position
    pos += dt * vel
    positions[tid] = wp.vec3d(wp.float64(pos[0]), wp.float64(pos[1]), wp.float64(pos[2]))

    # update orientation
    dq = wp.quat_between_vectors(forward, vel)
    orientations[tid] = wp.normalize(dq * orientations[tid])

    # save velocity
    boid.vel = vel
    boids[tid] = boid

    # update glow as an exponentially weighted moving average to keep it smooth
    glow = wp.min(1.0, float(num_neighbors) / 25.0)
    glow_alpha = 0.25
    glows[tid] = glow_alpha * glow + (1.0 - glow_alpha) * glows[tid]


#   Internal State
# ------------------------------------------------------------------------------


class InternalState:
    """Internal state for the node."""

    def __init__(self) -> None:
        self.initialized = False


    def initialize(self, device):

        # requirement checks
        ext_mgr = omni.kit.app.get_app().get_extension_manager()

        # make sure USDRT is enabled
        usdrt_ext_name = "usdrt.scenegraph"
        if not ext_mgr.is_extension_enabled(usdrt_ext_name):
            raise RuntimeError(f"This sample requires the '{usdrt_ext_name}' extension to be enabled")

        # check USDRT version to make sure we have a working SelectPrims()
        usdrt_ext_id = ext_mgr.get_enabled_extension_id(usdrt_ext_name)
        usdrt_version_string = ext_mgr.get_extension_dict(usdrt_ext_id)["package"]["version"]
        usdrt_version = tuple(int(v) for v in usdrt_version_string.split("."))
        if usdrt_version < (7, 3, 0):
            raise RuntimeError(f"USDRT version 7.3.0 is required, found {usdrt_version_string}.  Please update to a newer version of Kit to run this sample.")

        # check if FSD is enabled
        settings = carb.settings.get_settings()
        is_fsd_enabled = settings.get_as_bool("/app/useFabricSceneDelegate")
        if not is_fsd_enabled:
            print("***")
            print("*** Flocking demo warning: The Fabric Scene Delegate is not enabled.")
            print("*** Some features, like color animation, may not work.")
            print("*** You can enable FSD in Preferences->Rendering.")
            print("***")

        stage_id = omni.usd.get_context().get_stage_id()
        
        usdrt_stage = usdrt.Usd.Stage.Attach(stage_id)

        # import to Fabric
        for prim in usdrt_stage.Traverse():
            pass

        # set up for Fabric interop
        boid_root = usdrt_stage.GetPrimAtPath(usdrt.Sdf.Path("/World/Boids"))
        boid_prims = boid_root.GetChildren()
        for prim in boid_prims:
            pos = prim.GetAttribute("xformOp:translate").Get()
            prim.CreateAttribute("_worldPosition", usdrt.Sdf.ValueTypeNames.Double3, True).Set(pos)
            prim.CreateAttribute("_worldOrientation", usdrt.Sdf.ValueTypeNames.Quatf, True).Set(usdrt.Gf.Quatf(1, 0, 0, 0))

            # create a custom tag for the boids (could use applied schema too)
            prim.CreateAttribute("BoidTag", usdrt.Sdf.ValueTypeNames.AppliedSchemaTypeTag, True)

        num_boids = len(boid_prims)

        self.stage = usdrt_stage

        self.require_schemas = ["BoidTag"]

        self.transform_attrs = [
            (usdrt.Sdf.ValueTypeNames.Double3, "_worldPosition", usdrt.Usd.Access.ReadWrite),
            (usdrt.Sdf.ValueTypeNames.Quatf, "_worldOrientation", usdrt.Usd.Access.ReadWrite),
        ]

        self.color_attrs = [
            (usdrt.Sdf.ValueTypeNames.Float3Array, "primvars:_emissive", usdrt.Usd.Access.ReadWrite),
        ]

        npboids = np.zeros(num_boids, dtype=Boid.numpy_dtype())

        angles = math.pi - 2 * math.pi * np.random.rand(num_boids)
        vx = 20 * np.sin(angles)
        vz = 20 * np.cos(angles)
        npboids["vel"][:, 0] = vx
        npboids["vel"][:, 2] = vz

        npboids["wander_angles"][:, 0] = math.pi * np.random.rand(num_boids)
        npboids["wander_angles"][:, 1] = 2 * math.pi * np.random.rand(num_boids)

        min_mass = 1.0
        max_mass = 2.0
        npboids["mass"][:] = min_mass + (max_mass - min_mass) * np.random.rand(num_boids)

        # we can have up to 3 groups currently, but that can be easily extended
        self.num_groups = 2
        npboids["group"] = np.random.randint(self.num_groups, size=num_boids)

        num_obstacles = 3
        npobstacles = np.zeros(num_obstacles, dtype=Obstacle.numpy_dtype())
        npobstacles["pos"][0] = (-20, 30, -40)
        npobstacles["radius"][0] = 40
        npobstacles["pos"][1] = (90, 30, 30)
        npobstacles["radius"][1] = 30
        npobstacles["pos"][2] = (-100, 30, 60)
        npobstacles["radius"][2] = 25

        self.grid = wp.HashGrid(dim_x=32, dim_y=32, dim_z=32, device=device)

        biases = wp.mat33f(-1.0)
        for i in range(self.num_groups):
            biases[i, i] = 1.0

        world = World()
        world.lower = (-120, 20, -90)
        world.upper = (120, 40, 90)
        world.grid = self.grid.id
        world.seed = 0
        world.biases = biases
        world.obstacles = wp.array(npobstacles, dtype=Obstacle, device=device)
        self.world = world

        self.num_boids = num_boids
        self.boids = wp.array(npboids, dtype=Boid, device=device)

        # color ramps per group
        color_ramps = [
            [[0.3, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 0.5, 0.0], [1.0, 1.0, 0.5]],
            [[0.0, 0.0, 0.3], [0.0, 0.0, 1.0], [0.0, 0.5, 1.0], [0.5, 1.0, 1.0]],
            [[0.0, 0.3, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0, 0.5], [0.8, 1.0, 0.8]],
        ]

        # copy of positions used for updating the spatial grid
        self.positions = wp.zeros(num_boids, dtype=wp.vec3f, device=device)

        # color ramps are only used on the COLOR_DEVICE
        self.color_ramps_c = wp.array(color_ramps, dtype=wp.vec3f, device=COLOR_DEVICE)

        # keep a copy of group assignments on the COLOR_DEVICE
        self.groups_c = wp.array(npboids["group"], device=COLOR_DEVICE)

        # if we use different devices, the glow array must be copied on each update
        if COLOR_DEVICE == device:
            # use the same glow array on each device, no copying needed
            self.glows_c = wp.zeros(num_boids, dtype=float, device=device)
            self.glows_m = self.glows_c
        elif COLOR_DEVICE == "cpu" or device == "cpu":
            # use a pinned host array for async copying glows between devices
            glows_h = wp.zeros(num_boids, dtype=float, device="cpu", pinned=True)
            if COLOR_DEVICE == "cpu":
                self.glows_c = glows_h
                self.glows_m = wp.zeros_like(glows_h, device=device)
            else:
                self.glows_c = wp.zeros_like(glows_h, device=COLOR_DEVICE)
                self.glows_m = glows_h
        else:
            # two different CUDA devices
            self.glows_c = wp.zeros(num_boids, dtype=float, device=COLOR_DEVICE)
            self.glows_m = wp.zeros(num_boids, dtype=float, device=device)

            # ...but that's currently not supported in Kit
            raise ValueError("Multiple GPUs not supported yet")

        self.time = 0.0

        self.min_group_think = 3.0
        self.max_group_think = 10.0
        self.next_group_think = self.min_group_think + (self.max_group_think - self.min_group_think) * np.random.rand()

        self.frameno = 0

        self.initialized = True


#   Compute
# ------------------------------------------------------------------------------


def compute(db: OgnSamplePrimFlockingDatabase) -> None:
    """Evaluates the node."""

    state = db.internal_state

    device = wp.get_device()

    if not state.initialized:
        state.initialize(device)

    state.frameno += 1

    # get transform attributes
    selection = state.stage.SelectPrims(
        require_applied_schemas=state.require_schemas,
        require_attrs=state.transform_attrs,
        device=str(device)
    )

    fpos = wp.fabricarray(data=selection, attrib="_worldPosition")
    frot = wp.fabricarray(data=selection, attrib="_worldOrientation")

    # use fixed dt for stability
    dt = 1.0 / 60.0

    state.time += dt

    # copy positions to a contiguous array and convert to vec3f so they can be used to update the spatial grid
    wp.launch(copy_positions, dim=state.num_boids, inputs=[state.positions, fpos])

    # grid cell radius should be a bit bigger than query radius
    cell_radius = 20.0
    state.grid.build(state.positions, cell_radius)

    state.world.seed = state.frameno

    # step the flocking simulation
    wp.launch(boids, dim=state.num_boids, inputs=[state.boids, state.world, dt, fpos, frot, state.glows_m])
    
    # async copy from main device and remember the stream so we can sync later
    if COLOR_DEVICE != device:
        if device.is_cuda:
            work_stream = device.stream
        else:
            work_stream = wp.get_stream(COLOR_DEVICE)
        wp.copy(state.glows_c, state.glows_m, stream=work_stream)
    else:
        work_stream = None

    # get color attributes
    color_selection = state.stage.SelectPrims(
        require_applied_schemas=state.require_schemas,
        require_attrs=state.color_attrs,
        device=COLOR_DEVICE
    )

    fcolor = wp.fabricarray(data=color_selection, attrib="primvars:_emissive")

    # occasionally update group biases (whether they are attracted or repelled from each other)
    if state.num_groups > 1 and state.time >= state.next_group_think:

        # pick two random groups
        group0 = np.random.randint(state.num_groups)
        group1 = np.random.randint(state.num_groups)
        while group0 == group1:
            group1 = np.random.randint(state.num_groups)

        # bias towards intra-group separation, but also allow attraction
        state.world.biases[group0, group1] = 1.0 - 5.0 * np.random.rand()
        state.world.biases[group1, group0] = 1.0 - 5.0 * np.random.rand()

        state.next_group_think += state.min_group_think + (state.max_group_think - state.min_group_think) * np.random.rand()

    if work_stream is not None:
        # wait for async GPU work to complete
        wp.synchronize_stream(work_stream)

    # update colors
    wp.launch(assign_colors, dim=state.num_boids, inputs=[state.glows_c, state.groups_c, state.color_ramps_c, fcolor], device=COLOR_DEVICE)


#   Node Entry Point
# ------------------------------------------------------------------------------

class OgnSamplePrimFlocking:
    """Node."""

    @staticmethod
    def internal_state() -> InternalState:
        return InternalState()

    @staticmethod
    def compute(db: OgnSamplePrimFlockingDatabase) -> None:
        device = wp.get_device(MAIN_DEVICE)

        try:
            with wp.ScopedDevice(device):
                compute(db)
        except Exception:
            db.log_error(traceback.format_exc())
            return

        # Fire the execution for the downstream nodes.
        db.outputs.execOut = og.ExecutionAttributeState.ENABLED