{
  "omni_relay": {
    "status": "selected_relay_in_progress",
    "dataset": "ropedia-ai/xperience-10m",
    "staging": "accelerated_chunked_parallel_transfer_with_batch_prefetch",
    "training_target": "external_multi_gpu_training_host",
    "selection_strategy": "stratified_round_robin_by_top_level_session",
    "target_episodes": 128,
    "selected_sessions": 128,
    "candidate_scan_top_level_sessions": 802,
    "valid_candidates": 12102,
    "estimated_bytes": 298188841943,
    "exclude": [
      "visualization.rrd"
    ],
    "access_status": "Full-dataset access is granted; selected multi-episode relay is in progress with chunked parallel transfer and overlapping batch prefetch.",
    "current_scope": "The selected-episode Qwen3-Omni fine-tune requires completed data staging and held-out evaluation; the 32-episode Qwen3-Omni fine-tune requires gated data staging before any real held-out metric is reported."
  },
  "models": {
    "motion_action": {
      "accuracy": 0.9828178694158075,
      "balanced_accuracy": 0.9643518518518519,
      "macro_f1": 0.96884342657456,
      "weighted_f1": 0.9824311468352843,
      "num_eval_windows": 291,
      "num_classes": 18,
      "majority_baseline_accuracy": 0.13745704467353953,
      "train_final_accuracy": 1.0,
      "train_final_loss": 0.019042566418647766
    },
    "motion_subtask": {
      "accuracy": 0.9758620689655172,
      "balanced_accuracy": 0.9783924095954172,
      "macro_f1": 0.9528048001232955,
      "weighted_f1": 0.9778836359351952,
      "num_eval_windows": 290,
      "num_classes": 14,
      "majority_baseline_accuracy": 0.14482758620689656,
      "train_final_accuracy": 1.0,
      "train_final_loss": 0.02664567530155182
    },
    "all_modalities_action": {
      "accuracy": 0.9862542955326461,
      "balanced_accuracy": 0.9856481481481482,
      "macro_f1": 0.9828810433408773,
      "weighted_f1": 0.9862660597416385,
      "num_eval_windows": 291,
      "num_classes": 18,
      "majority_baseline_accuracy": 0.13745704467353953,
      "train_final_accuracy": 1.0,
      "train_final_loss": 0.014677195809781551,
      "feature_dim": 8546,
      "num_windows": 1144
    },
    "all_modalities_subtask": {
      "accuracy": 0.9827586206896551,
      "balanced_accuracy": 0.9505102040816327,
      "macro_f1": 0.9173189771658273,
      "weighted_f1": 0.9841228382209077,
      "num_eval_windows": 290,
      "num_classes": 14,
      "majority_baseline_accuracy": 0.14482758620689656,
      "train_final_accuracy": 1.0,
      "train_final_loss": 0.012834250926971436,
      "feature_dim": 8546,
      "num_windows": 1147
    }
  },
  "suite": {
    "annotation": "data/sample/xperience-10m-sample/annotation.hdf5",
    "num_frames": 5821,
    "num_windows": 1161,
    "feature_dim": 8546,
    "window_frames": 20,
    "stride_frames": 5,
    "tasks": {
      "timeline_action": {
        "accuracy": 0.029154518950437316,
        "balanced_accuracy": 0.03125,
        "macro_f1": 0.05,
        "weighted_f1": 0.04664723032069971,
        "num_eval_windows": 343,
        "num_classes": 18,
        "task": "timeline_action",
        "input": "all modalities -> current action label",
        "split": "chronological",
        "num_windows": 1144,
        "num_train_windows": 801,
        "num_test_windows": 343,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.0,
        "train_final_accuracy": 1.0,
        "train_final_loss": 0.016824405640363693,
        "unseen_test_classes": [
          "Place item on table",
          "Pour coffee",
          "Pour milk into coffee",
          "Wait/Prepare for pouring"
        ]
      },
      "timeline_subtask": {
        "accuracy": 0.05813953488372093,
        "balanced_accuracy": 0.05376979652090881,
        "macro_f1": 0.05056355513846935,
        "weighted_f1": 0.06827161211620246,
        "num_eval_windows": 344,
        "num_classes": 14,
        "task": "timeline_subtask",
        "input": "all modalities -> current subtask label",
        "split": "chronological",
        "num_windows": 1147,
        "num_train_windows": 803,
        "num_test_windows": 344,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.0,
        "train_final_accuracy": 1.0,
        "train_final_loss": 0.014138756319880486,
        "unseen_test_classes": [
          "Move bottle to coffee equipment",
          "Pour coffee",
          "Pour milk into coffee",
          "Prepare for pouring"
        ]
      },
      "transition_detection": {
        "accuracy": 0.9080459770114943,
        "balanced_accuracy": 0.6543674698795181,
        "macro_f1": 0.6118237590630229,
        "weighted_f1": 0.9197389592989339,
        "num_eval_windows": 348,
        "num_classes": 2,
        "task": "transition_detection",
        "input": "all modalities -> action boundary/steady",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.9540229885057471,
        "train_final_accuracy": 1.0,
        "train_final_loss": 0.007154403254389763,
        "unseen_test_classes": [],
        "boundary_precision": 0.07142857142857142,
        "boundary_recall": 0.5,
        "boundary_f1": 0.125,
        "matched_boundaries": 2,
        "true_boundaries": 4,
        "predicted_boundaries": 28,
        "mean_abs_timing_error_frames": 3.5
      },
      "next_action": {
        "accuracy": 0.034482758620689655,
        "balanced_accuracy": 0.04,
        "macro_f1": 0.05925925925925927,
        "weighted_f1": 0.05108556832694764,
        "num_eval_windows": 348,
        "num_classes": 18,
        "task": "next_action",
        "input": "all modalities at t -> action at t+20 frames",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.0,
        "train_final_accuracy": 1.0,
        "train_final_loss": 0.01754833571612835,
        "unseen_test_classes": [
          "Place item on table",
          "Pour coffee",
          "Pour milk into coffee",
          "Wait/Prepare for pouring"
        ]
      },
      "hand_trajectory_forecast": {
        "mse": 14.956222534179688,
        "mae": 0.420173317193985,
        "r2": -1763.3831383277447,
        "task": "hand_trajectory_forecast",
        "input": "all modalities at t -> future left/right hand 3D joints",
        "split": "chronological",
        "num_windows": 1159,
        "num_train_windows": 811,
        "num_test_windows": 348,
        "forecast_frames": 10,
        "mpjpe": 0.8646570444107056,
        "final_frame_mpjpe": 1.0330793857574463,
        "target_dim": 1260
      },
      "contact_prediction": {
        "accuracy": 1.0,
        "balanced_accuracy": 1.0,
        "macro_f1": 1.0,
        "weighted_f1": 1.0,
        "num_eval_windows": 348,
        "num_classes": 1,
        "task": "contact_prediction",
        "input": "all non-contact/non-caption-label modalities -> any body contact",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "feature_dim": 7503,
        "majority_baseline_accuracy": 1.0,
        "train_final_accuracy": 1.0,
        "train_final_loss": 0.0006056802230887115,
        "unseen_test_classes": []
      },
      "object_relevance": {
        "micro_f1": 0.18034382095361662,
        "macro_f1": 0.06329638076675959,
        "exact_match": 0.005747126436781609,
        "precision": 0.16106604866743918,
        "recall": 0.20486366985998525,
        "task": "object_relevance",
        "input": "all non-caption modalities -> current relevant object set",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "num_objects": 34
      },
      "caption_grounding": {
        "mrr": 0.016023479050338015,
        "median_rank": 172.0,
        "mean_rank": 174.67816091954023,
        "num_queries": 348,
        "top1_accuracy": 0.0028735632183908046,
        "top5_accuracy": 0.011494252873563218,
        "top10_accuracy": 0.014367816091954023,
        "task": "caption_grounding",
        "input": "caption objects/interaction text query + candidate sensor windows",
        "output": "matching time window",
        "split": "chronological",
        "num_train_windows": 813,
        "num_test_windows": 348
      },
      "cross_modal_retrieval": {
        "mrr": 0.26925966892956127,
        "median_rank": 14.0,
        "mean_rank": 43.34770114942529,
        "num_queries": 348,
        "top1_accuracy": 0.16379310344827586,
        "top5_accuracy": 0.367816091954023,
        "top10_accuracy": 0.47126436781609193,
        "task": "cross_modal_retrieval",
        "input": "motion/IMU/camera/audio query",
        "output": "matching depth/video window",
        "split": "chronological",
        "num_train_windows": 813,
        "num_test_windows": 348
      },
      "modality_reconstruction": {
        "mse": 1358.1593017578125,
        "mae": 0.29572129249572754,
        "r2": -0.015271898913936655,
        "task": "modality_reconstruction",
        "input": "motion/IMU/camera/audio",
        "output": "depth/video feature vector",
        "split": "chronological",
        "num_train_windows": 813,
        "num_test_windows": 348,
        "target_dim": 5096
      },
      "temporal_order": {
        "accuracy": 0.4540229885057471,
        "precision": 0.4665271966527197,
        "recall": 0.6408045977011494,
        "f1": 0.5399515738498789,
        "tp": 223,
        "tn": 93,
        "fp": 255,
        "fn": 125,
        "positive_rate_true": 0.5,
        "positive_rate_pred": 0.6867816091954023,
        "task": "temporal_order",
        "input": "two adjacent windows -> whether order is correct",
        "split": "chronological",
        "num_samples": 2320,
        "num_train_samples": 1624,
        "num_test_samples": 696,
        "train_final_accuracy": 0.5086206896551724
      },
      "misalignment_detection": {
        "accuracy": 0.5158959537572254,
        "precision": 0.5166163141993958,
        "recall": 0.49421965317919075,
        "f1": 0.5051698670605613,
        "tp": 171,
        "tn": 186,
        "fp": 160,
        "fn": 175,
        "positive_rate_true": 0.5,
        "positive_rate_pred": 0.47832369942196534,
        "task": "misalignment_detection",
        "input": "motion+visual/audio pair -> aligned vs shifted by 8 windows",
        "split": "chronological",
        "num_samples": 2306,
        "num_train_samples": 1614,
        "num_test_samples": 692,
        "train_final_accuracy": 0.49380421313506817
      }
    },
    "neural_model": {
      "name": "neural_mlp",
      "type": "lightweight PyTorch MLP over shared window features",
      "epochs": 80,
      "hidden_dim": 128,
      "batch_size": 128,
      "learning_rate": 0.001,
      "weight_decay": 0.0001,
      "dropout": 0.1,
      "device": "auto"
    },
    "neural_tasks": {
      "timeline_action": {
        "accuracy": 0.008746355685131196,
        "balanced_accuracy": 0.009375,
        "macro_f1": 0.014814814814814814,
        "weighted_f1": 0.013821401576503616,
        "num_eval_windows": 343,
        "num_classes": 18,
        "task": "timeline_action",
        "input": "all modalities -> current action label",
        "split": "chronological",
        "num_windows": 1144,
        "num_train_windows": 801,
        "num_test_windows": 343,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.0,
        "unseen_test_classes": [
          "Place item on table",
          "Pour coffee",
          "Pour milk into coffee",
          "Wait/Prepare for pouring"
        ],
        "model": "neural_mlp",
        "head": "z-score -> MLP softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.04246756529782,
        "train_final_accuracy": 0.9875156054931336
      },
      "timeline_subtask": {
        "accuracy": 0.0377906976744186,
        "balanced_accuracy": 0.045614035087719294,
        "macro_f1": 0.02810810810810811,
        "weighted_f1": 0.023287240729101197,
        "num_eval_windows": 344,
        "num_classes": 14,
        "task": "timeline_subtask",
        "input": "all modalities -> current subtask label",
        "split": "chronological",
        "num_windows": 1147,
        "num_train_windows": 803,
        "num_test_windows": 344,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.0,
        "unseen_test_classes": [
          "Move bottle to coffee equipment",
          "Pour coffee",
          "Pour milk into coffee",
          "Prepare for pouring"
        ],
        "model": "neural_mlp",
        "head": "z-score -> MLP softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 5.4104819144748596e-05,
        "train_final_accuracy": 1.0
      },
      "transition_detection": {
        "accuracy": 0.8735632183908046,
        "balanced_accuracy": 0.666039156626506,
        "macro_f1": 0.5862068965517241,
        "weighted_f1": 0.8993261989694807,
        "num_eval_windows": 348,
        "num_classes": 2,
        "task": "transition_detection",
        "input": "all modalities -> action boundary/steady",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.9540229885057471,
        "unseen_test_classes": [],
        "model": "neural_mlp",
        "head": "z-score -> MLP softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.029138497962572854,
        "train_final_accuracy": 0.990159901599016,
        "boundary_precision": 0.07142857142857142,
        "boundary_recall": 0.75,
        "boundary_f1": 0.13043478260869565,
        "matched_boundaries": 3,
        "true_boundaries": 4,
        "predicted_boundaries": 42,
        "mean_abs_timing_error_frames": 2.6666666666666665
      },
      "next_action": {
        "accuracy": 0.02586206896551724,
        "balanced_accuracy": 0.03,
        "macro_f1": 0.04186046511627907,
        "weighted_f1": 0.03608660785886127,
        "num_eval_windows": 348,
        "num_classes": 18,
        "task": "next_action",
        "input": "all modalities at t -> action at t+20 frames",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "feature_dim": 8546,
        "majority_baseline_accuracy": 0.0,
        "unseen_test_classes": [
          "Place item on table",
          "Pour coffee",
          "Pour milk into coffee",
          "Wait/Prepare for pouring"
        ],
        "model": "neural_mlp",
        "head": "z-score -> MLP softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.000416612956025105,
        "train_final_accuracy": 1.0
      },
      "hand_trajectory_forecast": {
        "mse": 0.004775360692292452,
        "mae": 0.05433763191103935,
        "r2": 0.43665148265771614,
        "task": "hand_trajectory_forecast",
        "input": "all modalities at t -> future left/right hand 3D joints",
        "split": "chronological",
        "num_windows": 1159,
        "num_train_windows": 811,
        "num_test_windows": 348,
        "forecast_frames": 10,
        "mpjpe": 0.10785018652677536,
        "final_frame_mpjpe": 0.11407545953989029,
        "target_dim": 1260,
        "model": "neural_mlp",
        "head": "z-score -> MLP regression",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.055699273420247435
      },
      "contact_prediction": {
        "accuracy": 1.0,
        "balanced_accuracy": 1.0,
        "macro_f1": 1.0,
        "weighted_f1": 1.0,
        "num_eval_windows": 348,
        "num_classes": 1,
        "task": "contact_prediction",
        "input": "all non-contact/non-caption-label modalities -> any body contact",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "feature_dim": 7503,
        "majority_baseline_accuracy": 1.0,
        "unseen_test_classes": [],
        "model": "neural_mlp",
        "head": "z-score -> MLP softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.0,
        "train_final_accuracy": 1.0
      },
      "object_relevance": {
        "micro_f1": 0.1679279279279279,
        "macro_f1": 0.048883162556964774,
        "exact_match": 0.014367816091954023,
        "precision": 0.16431593794076163,
        "recall": 0.17170228445099484,
        "task": "object_relevance",
        "input": "all non-caption modalities -> current relevant object set",
        "split": "chronological",
        "num_windows": 1161,
        "num_train_windows": 813,
        "num_test_windows": 348,
        "num_objects": 34,
        "feature_dim": 7650,
        "model": "neural_mlp",
        "head": "z-score -> MLP sigmoid multilabel",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.003651880362182214
      },
      "caption_grounding": {
        "mrr": 0.01684125567132316,
        "median_rank": 180.5,
        "mean_rank": 178.382183908046,
        "num_queries": 348,
        "top1_accuracy": 0.0028735632183908046,
        "top5_accuracy": 0.014367816091954023,
        "top10_accuracy": 0.020114942528735632,
        "task": "caption_grounding",
        "input": "caption objects/interaction text query + candidate sensor windows",
        "split": "chronological",
        "num_train_windows": 813,
        "num_test_windows": 348,
        "target_dim": 896,
        "output": "matching time window",
        "model": "neural_mlp",
        "head": "z-score -> MLP projection/regression",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.06317874967483723
      },
      "cross_modal_retrieval": {
        "mrr": 0.1299971898648288,
        "median_rank": 40.0,
        "mean_rank": 66.60057471264368,
        "num_queries": 348,
        "top1_accuracy": 0.05172413793103448,
        "top5_accuracy": 0.19827586206896552,
        "top10_accuracy": 0.2413793103448276,
        "task": "cross_modal_retrieval",
        "input": "motion/IMU/camera/audio query",
        "split": "chronological",
        "num_train_windows": 813,
        "num_test_windows": 348,
        "target_dim": 5096,
        "output": "matching depth/video window",
        "model": "neural_mlp",
        "head": "z-score -> MLP projection/regression",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.21891545446596464
      },
      "modality_reconstruction": {
        "mse": 1351.3363037109375,
        "mae": 0.10379635542631149,
        "r2": -0.010171410134180991,
        "task": "modality_reconstruction",
        "input": "motion/IMU/camera/audio",
        "split": "chronological",
        "num_train_windows": 813,
        "num_test_windows": 348,
        "target_dim": 5096,
        "output": "depth/video feature vector",
        "model": "neural_mlp",
        "head": "z-score -> MLP projection/regression",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.21891545446596464
      },
      "temporal_order": {
        "accuracy": 0.8577586206896551,
        "precision": 0.8878504672897196,
        "recall": 0.8189655172413793,
        "f1": 0.8520179372197308,
        "tp": 285,
        "tn": 312,
        "fp": 36,
        "fn": 63,
        "positive_rate_true": 0.5,
        "positive_rate_pred": 0.46120689655172414,
        "task": "temporal_order",
        "input": "two adjacent windows -> whether order is correct",
        "split": "chronological",
        "num_samples": 2320,
        "num_train_samples": 1624,
        "num_test_samples": 696,
        "feature_dim": 25638,
        "model": "neural_mlp",
        "head": "z-score -> MLP binary softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.0005108328477586757,
        "train_final_accuracy": 1.0
      },
      "misalignment_detection": {
        "accuracy": 0.7008670520231214,
        "precision": 0.6824146981627297,
        "recall": 0.7514450867052023,
        "f1": 0.7152682255845944,
        "tp": 260,
        "tn": 225,
        "fp": 121,
        "fn": 86,
        "positive_rate_true": 0.5,
        "positive_rate_pred": 0.5505780346820809,
        "task": "misalignment_detection",
        "input": "motion+visual/audio pair -> aligned vs shifted by 8 windows",
        "split": "chronological",
        "num_samples": 2306,
        "num_train_samples": 1614,
        "num_test_samples": 692,
        "feature_dim": 7511,
        "model": "neural_mlp",
        "head": "z-score -> MLP binary softmax",
        "neural_epochs": 80,
        "neural_hidden_dim": 128,
        "neural_batch_size": 128,
        "neural_learning_rate": 0.001,
        "neural_weight_decay": 0.0001,
        "neural_dropout": 0.1,
        "neural_device": "cpu",
        "train_final_loss": 0.010604870708167664,
        "train_final_accuracy": 0.9956629491945477
      }
    }
  },
  "feature_manifest": [
    {
      "name": "hand left joints",
      "start": 0,
      "end": 441,
      "dim": 441
    },
    {
      "name": "hand right joints",
      "start": 441,
      "end": 882,
      "dim": 441
    },
    {
      "name": "body joints",
      "start": 882,
      "end": 1974,
      "dim": 1092
    },
    {
      "name": "body contacts",
      "start": 1974,
      "end": 2121,
      "dim": 147
    },
    {
      "name": "camera translation",
      "start": 2121,
      "end": 2142,
      "dim": 21
    },
    {
      "name": "camera rotation matrix",
      "start": 2142,
      "end": 2205,
      "dim": 63
    },
    {
      "name": "imu accel gyro",
      "start": 2205,
      "end": 2247,
      "dim": 42
    },
    {
      "name": "depth confidence",
      "start": 2247,
      "end": 3227,
      "dim": 980
    },
    {
      "name": "video fisheye cam0",
      "start": 3227,
      "end": 3913,
      "dim": 686
    },
    {
      "name": "video fisheye cam1",
      "start": 3913,
      "end": 4599,
      "dim": 686
    },
    {
      "name": "video fisheye cam2",
      "start": 4599,
      "end": 5285,
      "dim": 686
    },
    {
      "name": "video fisheye cam3",
      "start": 5285,
      "end": 5971,
      "dim": 686
    },
    {
      "name": "video stereo left",
      "start": 5971,
      "end": 6657,
      "dim": 686
    },
    {
      "name": "video stereo right",
      "start": 6657,
      "end": 7343,
      "dim": 686
    },
    {
      "name": "audio",
      "start": 7343,
      "end": 7511,
      "dim": 168
    },
    {
      "name": "language text",
      "start": 7511,
      "end": 8407,
      "dim": 896
    },
    {
      "name": "slam point cloud",
      "start": 8407,
      "end": 8429,
      "dim": 22
    },
    {
      "name": "calibration",
      "start": 8429,
      "end": 8546,
      "dim": 117
    }
  ]
}