| { | |
| "data": { | |
| "augmentations": { | |
| "affine_p": 0.0, | |
| "blur_p": 0.1, | |
| "cut_p": 0.0, | |
| "flip_p": 0.5, | |
| "flipt_p": 0.3, | |
| "gamma_p": 0.5, | |
| "grayscale_p": 0.1, | |
| "invert_p": 0.0, | |
| "jitter_p": 0.5, | |
| "noise_pad": 1.0, | |
| "only_zoom": true, | |
| "random_blur": 2.0, | |
| "random_gamma": 0.2, | |
| "random_jitter": 0.1, | |
| "random_rotation": 0.0, | |
| "random_scale": 2.0, | |
| "random_shear": 0.0, | |
| "random_translate_x": 0.04, | |
| "random_translate_y": 0.01, | |
| "rotation_p": 0.0, | |
| "scale_p": 0.0, | |
| "shape_constraints": { | |
| "height_min": 15, | |
| "pixels_max": 600000.0, | |
| "pixels_min": 200000.0, | |
| "ratio_bounds": [ | |
| 0.5, | |
| 2.5 | |
| ], | |
| "sample": true, | |
| "shape_mult": 14, | |
| "width_min": 15 | |
| }, | |
| "shape_mult": 14, | |
| "test_context": 1.0, | |
| "translate_p": 0.0 | |
| }, | |
| "crop": "garg", | |
| "data_root": "datasets", | |
| "flow": "of", | |
| "image_shape": [ | |
| 518, | |
| 518 | |
| ], | |
| "keepGT": 0, | |
| "mini": 1.0, | |
| "normalization": "imagenet", | |
| "num_frames": 2, | |
| "pair": 1, | |
| "resize_method": "contextcrop", | |
| "sampling": {}, | |
| "shape_constraints": { | |
| "height_min": 15, | |
| "pixels_max": 600000.0, | |
| "pixels_min": 200000.0, | |
| "ratio_bounds": [ | |
| 0.5, | |
| 2.5 | |
| ], | |
| "sample": true, | |
| "shape_mult": 14, | |
| "width_min": 15 | |
| }, | |
| "train_datasets": [], | |
| "val_datasets": [ | |
| "ScanNetVid", | |
| "VKITTI", | |
| "Bonn", | |
| "TUM", | |
| "Sintel" | |
| ] | |
| }, | |
| "eps": 1e-06, | |
| "generic": { | |
| "deterministic": true, | |
| "name_page": "velodepth", | |
| "seed": 42 | |
| }, | |
| "model": { | |
| "expansion": 4, | |
| "flow_encoder": { | |
| "embed_dims": [ | |
| 80, | |
| 160 | |
| ], | |
| "frozen_stages": -1, | |
| "name": "convnextv2_nano", | |
| "num_levels": 2, | |
| "pretrained": "timm" | |
| }, | |
| "layer_scale": 1.0, | |
| "name": "VeloDepth", | |
| "num_heads": 8, | |
| "pixel_decoder": { | |
| "depths": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "dropout": 0.0, | |
| "hidden_dim": 512, | |
| "kernel_size": 3, | |
| "name": "Decoder", | |
| "num_fusion_block": 1, | |
| "num_prompt_blocks": 1, | |
| "out_dim": 64 | |
| }, | |
| "pixel_encoder": { | |
| "cls_token_embed_dims": [ | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024 | |
| ], | |
| "depths": [ | |
| 6, | |
| 12, | |
| 18, | |
| 24 | |
| ], | |
| "embed_dim": 1024, | |
| "embed_dims": [ | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024 | |
| ], | |
| "freeze_norm": true, | |
| "frozen_stages": 0, | |
| "lr": 3e-06, | |
| "name": "dinov2_vitl14", | |
| "num_register_tokens": 0, | |
| "output_idx": [ | |
| 6, | |
| 12, | |
| 18, | |
| 24 | |
| ], | |
| "patch_size": 14, | |
| "pretrained": null, | |
| "stacking_fn": "last", | |
| "use_norm": true, | |
| "wd": 0.1 | |
| }, | |
| "residual_encoder": { | |
| "embed_dim": 96, | |
| "embed_dims": [ | |
| 96, | |
| 192, | |
| 384, | |
| 768 | |
| ], | |
| "frozen_stages": 0, | |
| "lr": 0.0001, | |
| "name": "convnextv2_tiny", | |
| "num_levels": 1, | |
| "pretrained": "timm", | |
| "wd": 0.01 | |
| } | |
| }, | |
| "training": { | |
| "f16": "f16", | |
| "losses": { | |
| "camera": { | |
| "name": "Dummy", | |
| "weight": 1.0 | |
| }, | |
| "depth": { | |
| "name": "Dummy", | |
| "weight": 1.0 | |
| }, | |
| "edge": { | |
| "name": "Dummy", | |
| "weight": 1.0 | |
| }, | |
| "features": { | |
| "name": "Dummy", | |
| "weight": 1.0 | |
| }, | |
| "flow": { | |
| "name": "Dummy", | |
| "weight": 1.0 | |
| }, | |
| "self": { | |
| "name": "Dummy", | |
| "weight": 1.0 | |
| } | |
| } | |
| } | |
| } |