File size: 12,853 Bytes
0a5af20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
dataset:
  view_sampler:
    name: boundedv2
    num_target_views: 6
    num_context_views: 8
    min_distance_between_context_views: 24
    max_distance_between_context_views: 45
    max_distance_to_context_views: 0
    context_gap_warm_up_steps: 10000
    target_gap_warm_up_steps: 0
    initial_min_distance_between_context_views: 20
    initial_max_distance_between_context_views: 30
    initial_max_distance_to_context_views: 0
    extra_views_sampling_strategy: farthest_point
    target_views_replace_sample: false
  name: dl3dv
  roots:
  - datasets/dl3dv-480p-chunks
  make_baseline_1: false
  augment: true
  image_shape:
  - 256
  - 448
  background_color:
  - 0.0
  - 0.0
  - 0.0
  cameras_are_circular: false
  baseline_epsilon: 0.001
  max_fov: 100.0
  skip_bad_shape: true
  near: 0.01
  far: 200.0
  baseline_scale_bounds: false
  shuffle_val: true
  test_len: -1
  test_chunk_interval: 1
  sort_target_index: true
  sort_context_index: true
  train_times_per_scene: 1
  test_times_per_scene: 1
  ori_image_shape:
  - 270
  - 480
  overfit_max_views: 148
  use_index_to_load_chunk: false
  mix_tartanair: false
  no_mix_test_set: true
  load_depth: false
  center_pose: false
  opencv_pose_format: null
  overfit_to_scene: null
  min_views: 0
  max_views: 0
  highres: false
  mix_re10k: false
  re10k_min_view_dist: 40
  re10k_max_view_dist: 300
  pose_align_first_view: false
  scale_extrinsics: 1.0
  metric_scale_align_dl3dv: false
  load_remain_context: false
  num_remain_context: 8
  random_crop: false
  min_size:
  - 384
  - 512
  max_size:
  - 512
  - 960
scene_trainer:
  scene_initializer:
    name: resplat
    num_depth_candidates: 128
    num_surfaces: 1
    gaussians_per_pixel: 1
    gaussian_adapter:
      gaussian_scale_min: 0.5
      gaussian_scale_max: 0.3
      sh_degree: 3
      exp_scale: false
      softplus_scale: true
      clamp_min_scale: 1.0e-06
      scale_detach_depth: false
      exp_scale_bias: 4.0
      no_rotate_sh: true
      no_sh_mask: true
      init_rotation_identity: false
    d_feature: 128
    visualizer:
      num_samples: 8
      min_resolution: 256
      export_ply: false
    unimatch_weights_path: pretrained/gmdepth-scale1-resumeflowthings-scannet-5d9d7964.pth
    multiview_trans_attn_split: 2
    costvolume_unet_feat_dim: 128
    costvolume_unet_channel_mult:
    - 1
    - 1
    - 1
    costvolume_unet_attn_res: []
    depth_unet_feat_dim: 64
    depth_unet_attn_res: []
    depth_unet_channel_mult:
    - 1
    - 1
    - 1
    downscale_factor: 4
    shim_patch_size: 4
    local_mv_match: 2
    monodepth_vit_type: vits
    supervise_intermediate_depth: true
    return_depth: true
    num_scales: 1
    upsample_factor: 8
    lowest_feature_resolution: 8
    depth_unet_channels: 128
    grid_sample_disable_cudnn: false
    large_gaussian_head: false
    color_large_unet: false
    init_sh_input_img: true
    feature_upsampler_channels: 64
    gaussian_regressor_channels: 256
    unet_gaussian_regressor: false
    resnet_gaussian_regressor: false
    train_depth_only: false
    pt_head: true
    pt_heads: 1
    init_pt_with_mv_attn: false
    init_pt_with_mv_attn_lowres: false
    pt_head_channels: null
    pt_head_concat_img: false
    pt_head_conv: false
    multi_scale_pt: false
    attn_proj_channels: 64
    fps_num_samples: null
    knn_samples: 16
    post_norm: false
    no_rpe: true
    no_knn_attn: false
    num_blocks: 4
    pt_downsample: 0
    fps_agg_func: attn
    subsample_method: fps
    add_pt_residual: true
    pt_pred_residual_position: false
    freeze_depth: false
    use_gt_depth: false
    separate_depth_color: false
    separate_depth_type: small
    separate_depth_gaussian_scale: false
    sample_log_depth: true
    bilinear_upsample_depth: false
    no_upsample_depth: false
    return_lowres_depth: false
    foundationstereo: false
    fstereo_num_refine: 1
    lvsm_gaussian_regressor: false
    lvsm_layers: 6
    latent_gs: true
    latent_downsample: 4
    fixed_latent_size: true
    latent_gs_img_interp: area
    dpt_head_depth: false
    latent_dpt_upsampler: false
    latent_dpt_upsampler_no_concat: false
    light_dpt_feature: false
    avgpool_depth: false
    nearest_down_depth: false
    predict_scale: false
    norm_by_points: false
    no_pred_depth_range: false
    point_dist_init_gaussian_scale: false
    resizeconv_upsampler: false
    depth_pred_half_res: false
    use_amp: true
    pt_head_amp: true
    use_fsdp: false
    use_checkpointing: false
    init_use_checkpointing: false
    new_gaussian_order: true
    rotate_quat_to_world: false
    refine_rotate_quat_to_world: false
    refine_no_use_covariance: false
    latent_new_reshape: false
    ptv3: false
    ptv3_grid_size: 0.1
    no_pixel_offset: false
    init_gaussian_multiple: 1
    deform_sample_depth: false
    deform_sample_depth_debug: false
  scene_optimizer:
    name: depthsplat
    no_refine_rotation: false
    no_refine_mean: false
    no_render_error: false
    refine_sh_only: false
    num_basic_refine_blocks: 4
    num_refine_blocks: 1
    refine_lpips_error: false
    refine_pool_vgg_features: false
    refine_use_all_vgg_features: false
    refine_vit_feature: false
    refine_resnet_feature: false
    no_freeze_resnet_feature: false
    shallow_resnet_feature: false
    resnet_feature_layers: 18
    refine_convnext_feature: false
    convnext_feature_size: small
    refine_concat_feature: false
    refine_concat_feature_cosine: false
    refine_cosine_feature: false
    refine_add_feature: false
    refine_concat_rgb_feature_error: false
    concat_init_state: true
    replace_init_state: false
    state_channels: 0
    refine_block_rmsnorm: false
    refine_block_layernorm: false
    pt_qk_norm: false
    norm_pt_block: false
    refine_gaussian_multiple: 1
    refine_residual_init_state: false
    clamp_refine_max_scale: 3.0
    refine_no_rpe: false
    gaussian_head_multiple: 1
    update_head_concat_img: false
    update_head_act: gelu
    update_attn_proj_channels: null
    update_no_knn_attn: false
    update_no_tran_block_norm: false
    update_tran_block_act: gelu
    multi_gaussian_scale_smaller: false
    init_gaussian_multiple: 1
    no_pixel_offset: false
    refine_condition_pt_feature: false
    render_error_no_abs: false
    reinit_gaussian_when_refine_multiple: false
    refine_same_num_points: false
    render_rgb_error_no_shuffle: false
    refine_cache_resnet_feature: false
    refine_view_pool_resnet_feature: false
    refine_global_pool_resnet_feature: false
    pt_heads: 1
    refine_with_mv_attn: false
    refine_with_mv_attn_lowres: false
    refine_no_mv_attn: false
    mv_attn_conv_with_norm: false
    refine_mv_shuffle_attn: false
    refine_mv_attn_with_pos_enc: false
    refine_shuffle_attn_no_norm: false
    refine_mv_unimatch_attn: false
    refine_knn_samples: 16
    refine_multi_scale_pt: false
    refine_input_alpha: false
    refine_input_depth: false
    refine_input_depth_smooth_error: false
    refine_input_error: true
    refine_input_zero_gaussian: false
    refine_input_zero_state: false
    refine_output_scale_mag: false
    cross_attn_render_error: false
    cross_attn_render_error_blocks: 1
    cross_attn_with_mlp: false
    radii_averaged_render_error: false
    cross_attn_additional_render_error: false
    num_intermediate_views: 8
    render_error_mv_attn: false
    render_error_mv_attn_blocks: 2
    render_error_mv_attn_lowres: false
    mv_attn_proj_channels: 0
    lowres_attn_down_factor: 4
    lowres_attn_pos_enc: false
    render_error_mv_attn_swin: false
    render_error_num_views: 0
    render_error_remain_context: false
    render_error_merge_remain_context: false
    render_error_warp_remain_context: false
    render_error_random_num_remain_context: false
    render_error_num_remain_context_test: 0
    render_error_warp_input_view: false
    window_local_refine: false
    window_global_refine: false
    window_local_global_refine: false
    update_window_size: 0
    local_gaussian_render: false
    render_error_half_res: false
    prune_opacity: 0.0
    prune_radii: 0
    refine_input_gradient: false
    refine_input_gradient_log: false
    refine_input_gradient_log_clip_deltas: 0.001
    refine_input_gradient_scale: 1.0
    gradient_update_scale: 1.0
    refine_gradient_with_ssim_loss: false
    refine_input_gradient_same_loss: false
    train_min_refine: 0
    train_max_refine: 0
    local_global_update: false
    num_global_update: 0
    train_global_update_only: false
    random_update_with_size: false
    ptv3: false
    ptv3_grid_size: 0.1
    use_amp: true
    pt_head_amp: true
    pt_update_amp: true
    use_fsdp: false
    use_checkpointing: false
    init_use_checkpointing: false
    debug_refine_update_module: true
    debug_refine_mlp: false
    debug_refine_mlp_layers: 0
    debug_refine_mlp_fixed_init: false
  decoder:
    name: gsplat
    scale_invariant: false
  initializer:
    num_depth_candidates: 128
    costvolume_unet_feat_dim: 128
    costvolume_unet_channel_mult:
    - 1
    - 1
    - 1
    costvolume_unet_attn_res:
    - 4
    gaussians_per_pixel: 1
    depth_unet_feat_dim: 32
    depth_unet_attn_res:
    - 16
    depth_unet_channel_mult:
    - 1
    - 1
    - 1
    - 1
    - 1
    shim_patch_size: 16
  use_fsdp: false
  train_scene_init: true
  train_scene_opt: false
  num_update_steps: 0
loss:
  mse:
    weight: 1.0
  lpips:
    weight: 0.5
    apply_after_step: 0
    perceptual_loss: true
test:
  postprocessing:
    __target__: src.model.postprocessing.PostProcessCfg
    enabled: false
    name: none
    steps: 0
    compute_metrics_every: 0
    lr: 0
    scheduler: null
    scheduler_warm_up_ratio: 0.0
  output_path: null
  compute_scores: true
  eval_time_skip_steps: 0
  save_image: false
  save_video: false
  save_gt_image: false
  save_input_images: false
  save_depth: false
  save_depth_npy: false
  save_depth_concat_img: false
  save_gaussian: false
  no_align_to_view: false
  save_point_cloud: false
  render_chunk_size: null
  stablize_camera: false
  stab_camera_kernel: 50
  render_input_views: false
  inference_window_size: null
  profile_model: false
  save_colmap_train_test_views: false
  ori_colmap_data_path: null
  adam_optimizer_step: 0
  save_every_freq: null
  save_every_steps: null
  dec_chunk_size: 30
wandb:
  project: unified-dl3dv-8views
  entity: placeholder
  name: dl3dv
  mode: online
  id: null
  notes: null
  tags:
  - dl3dv
  - 270x480
mode: train
data_loader:
  train:
    num_workers: 10
    persistent_workers: true
    batch_size: 2
    seed: 1234
  test:
    num_workers: 4
    persistent_workers: false
    batch_size: 1
    seed: 2345
  val:
    num_workers: 1
    persistent_workers: true
    batch_size: 1
    seed: 3456
meta_optimizer:
  lr: 0.0002
  lr_monodepth: 2.0e-06
  lr_depth: 0.0
  warm_up_steps: 2000
  weight_decay: 0.01
  warm_up_ratio: 0.01
  adamw_8bit: false
checkpointing:
  load: null
  every_n_train_steps: 1000
  save_top_k: 5
  pretrained_model: null
  pretrained_model_rel_dir: ${checkpoint_rel_dir:${checkpointing.pretrained_model}}
  pretrained_monodepth: null
  pretrained_mvdepth: null
  pretrained_depth: pretrained/depthsplat-depth-small-352x640-samplelogdepth-b0ebc084.pth
  pretrained_scale_predictor: null
  pretrained_depth_teacher: null
  no_strict_load: false
  resume: false
  no_resume_upsampler: false
  partial_load: false
  freeze_mono_vit: false
  resume_update_module: null
train:
  depth_mode: null
  extended_visualization: false
  print_log_every_n_steps: 100
  eval_model_every_n_val: 2
  eval_data_length: 999999
  eval_deterministic: false
  eval_time_skip_steps: 3
  eval_save_model: true
  l1_loss: true
  intermediate_loss_weight: 0.9
  no_viz_video: false
  eval_depth: false
  forward_depth_only: false
  train_ignore_large_loss: 0.0
  no_log_projections: true
  no_log_video: true
  depth_loss_weight: 0.0
  log_depth_loss: true
  depth_smooth_loss_weight: 0.01
  depth_smooth_loss_nonorm: false
  depth_smooth_loss_weight_nvs: 0.0
  monodepth_loss_weight: 0.0
  depth_teacher_loss_weight: 0.0
  viz_depth_teacher: false
  eval_render_depth: false
  render_depth_loss_weight: 0.0
  viz_render_depth: false
  use_gt_depth_range: false
  depth_range_from_disparity: false
  max_disparity: 128.0
  min_disparity: 4.0
  img_warp_loss_weight: 0.0
  warp_loss_start_step: 5000
  loss_on_input_views: false
  train_window_size: null
  half_res_lpips_loss: false
  viz_depth_separate: false
seed: 111123
meta_trainer:
  max_steps: 100000
  val_check_interval: 0.5
  gradient_clip_val: 0.5
  num_sanity_val_steps: 2
  eval_index: null
  limit_test_batches: 1.0
  limit_train_batches: 1.0
  num_nodes: 1
output_dir: !!python/object/apply:pathlib.PosixPath
- checkpoints
- optgs
- unified-dl3dv-8views
- init
use_plugins: false
log_slurm_id: true