| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from enum import Enum | |
| # class DataField(Enum): | |
| class DataField(str, Enum): | |
| # [B, C, H, W], float32, RGB image ranges from 0 to 1. | |
| IMAGE_RGB = "image_rgb" | |
| # [B, 4, 4], float32, camera-to-world transformation matrix. | |
| CAMERA_C2W_TRANSFORM = "camera_c2w_transform" | |
| # [B, 4], float32, OpenCV pinhole intrinsics represented as [fx, fy, cx, cy]. | |
| CAMERA_INTRINSICS = "camera_intrinsics" | |
| # list of captions of size B. | |
| CAPTION = "caption" | |
| # [B, H, W], float32, depth map in metric scale. | |
| METRIC_DEPTH = "metric_depth" | |
| # [B, H, W], uint8, instance mask (0 is background). | |
| DYNAMIC_INSTANCE_MASK = "dynamic_instance_mask" | |
| # [B, H, W], float32, backward flow from this frame to previous frame. | |
| BACKWARD_FLOW = "backward_flow" | |
| # [B, H, W, 3], float32, ray direction (assume no motion/RS). | |
| RAY_DIRECTION = "ray_direction" | |
| # TODO [Add description] | |
| OBJECT_BBOX = "object_bbox" | |
| # TODO [Add description] a list of float32 point cloud. | |
| POINT_CLOUD = "point_cloud" | |
| # [B, N, (3 + 3x3)], N future positions. For the last dim, | |
| # the first 3 are xyz locations, and tha last 9 are rots | |
| # B corresponds to the number of timestamps for the base camera type | |
| TRAJECTORY = "trajectory" | |
| # [V,] dictionary of meta data | |
| META_DATA = "meta_data" | |
| # [V, N, C] N is variable for different V float32 | |
| LANGUAGE_EMBEDDING = "language_embedding" | |
| # [B, C, T, H, W], float32, latent image | |
| LATENT_RGB = "latent_rgb" | |