{
  "data": {
    "augmentations": {
      "affine_p": 0.0,
      "blur_p": 0.1,
      "cut_p": 0.0,
      "flip_p": 0.5,
      "flipt_p": 0.3,
      "gamma_p": 0.5,
      "grayscale_p": 0.1,
      "invert_p": 0.0,
      "jitter_p": 0.5,
      "noise_pad": 1.0,
      "only_zoom": true,
      "random_blur": 2.0,
      "random_gamma": 0.2,
      "random_jitter": 0.1,
      "random_rotation": 0.0,
      "random_scale": 2.0,
      "random_shear": 0.0,
      "random_translate_x": 0.04,
      "random_translate_y": 0.01,
      "rotation_p": 0.0,
      "scale_p": 0.0,
      "shape_constraints": {
        "height_min": 15,
        "pixels_max": 600000.0,
        "pixels_min": 200000.0,
        "ratio_bounds": [
          0.5,
          2.5
        ],
        "sample": true,
        "shape_mult": 14,
        "width_min": 15
      },
      "shape_mult": 14,
      "test_context": 1.0,
      "translate_p": 0.0
    },
    "crop": "garg",
    "data_root": "datasets",
    "flow": "of",
    "image_shape": [
      518,
      518
    ],
    "keepGT": 0,
    "mini": 1.0,
    "normalization": "imagenet",
    "num_frames": 2,
    "pair": 1,
    "resize_method": "contextcrop",
    "sampling": {},
    "shape_constraints": {
      "height_min": 15,
      "pixels_max": 600000.0,
      "pixels_min": 200000.0,
      "ratio_bounds": [
        0.5,
        2.5
      ],
      "sample": true,
      "shape_mult": 14,
      "width_min": 15
    },
    "train_datasets": [],
    "val_datasets": [
      "ScanNetVid",
      "VKITTI",
      "Bonn",
      "TUM",
      "Sintel"
    ]
  },
  "eps": 1e-06,
  "generic": {
    "deterministic": true,
    "name_page": "velodepth",
    "seed": 42
  },
  "model": {
    "expansion": 4,
    "flow_encoder": {
      "embed_dims": [
        80,
        160
      ],
      "frozen_stages": -1,
      "name": "convnextv2_nano",
      "num_levels": 2,
      "pretrained": "timm"
    },
    "layer_scale": 1.0,
    "name": "VeloDepth",
    "num_heads": 8,
    "pixel_decoder": {
      "depths": [
        2,
        2,
        2
      ],
      "dropout": 0.0,
      "hidden_dim": 512,
      "kernel_size": 3,
      "name": "Decoder",
      "num_fusion_block": 1,
      "num_prompt_blocks": 1,
      "out_dim": 64
    },
    "pixel_encoder": {
      "cls_token_embed_dims": [
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024
      ],
      "depths": [
        6,
        12,
        18,
        24
      ],
      "embed_dim": 1024,
      "embed_dims": [
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024,
        1024
      ],
      "freeze_norm": true,
      "frozen_stages": 0,
      "lr": 3e-06,
      "name": "dinov2_vitl14",
      "num_register_tokens": 0,
      "output_idx": [
        6,
        12,
        18,
        24
      ],
      "patch_size": 14,
      "pretrained": null,
      "stacking_fn": "last",
      "use_norm": true,
      "wd": 0.1
    },
    "residual_encoder": {
      "embed_dim": 96,
      "embed_dims": [
        96,
        192,
        384,
        768
      ],
      "frozen_stages": 0,
      "lr": 0.0001,
      "name": "convnextv2_tiny",
      "num_levels": 1,
      "pretrained": "timm",
      "wd": 0.01
    }
  },
  "training": {
    "f16": "f16",
    "losses": {
      "camera": {
        "name": "Dummy",
        "weight": 1.0
      },
      "depth": {
        "name": "Dummy",
        "weight": 1.0
      },
      "edge": {
        "name": "Dummy",
        "weight": 1.0
      },
      "features": {
        "name": "Dummy",
        "weight": 1.0
      },
      "flow": {
        "name": "Dummy",
        "weight": 1.0
      },
      "self": {
        "name": "Dummy",
        "weight": 1.0
      }
    }
  }
}