File size: 16,424 Bytes
44cf072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
loss:
  mse:
    weight: 1.0
  lpips:
    weight: 0.5
    apply_after_step: 0
    perceptual_loss: true
  deltas:
    weight: 1
    exclude_by_norm_grad: true
    exclude_by_norm_grad_opposite: true
    eps: 1.0e-08
    apply_after_step: 100
dataset:
  view_sampler:
    name: boundedv2
    num_target_views: 6
    num_context_views: 8
    min_distance_between_context_views: 24
    max_distance_between_context_views: 45
    max_distance_to_context_views: 0
    context_gap_warm_up_steps: 10000
    target_gap_warm_up_steps: 0
    initial_min_distance_between_context_views: 20
    initial_max_distance_between_context_views: 30
    initial_max_distance_to_context_views: 0
    extra_views_sampling_strategy: farthest_point
    target_views_replace_sample: false
  name: dl3dv
  roots:
  - datasets/dl3dv-480p-chunks
  make_baseline_1: false
  augment: true
  image_shape:
  - 256
  - 448
  background_color:
  - 0.0
  - 0.0
  - 0.0
  cameras_are_circular: false
  baseline_epsilon: 0.001
  max_fov: 100.0
  skip_bad_shape: true
  near: 0.01
  far: 200.0
  baseline_scale_bounds: false
  shuffle_val: true
  test_len: -1
  test_chunk_interval: 1
  sort_target_index: true
  sort_context_index: true
  train_times_per_scene: 1
  test_times_per_scene: 1
  ori_image_shape:
  - 270
  - 480
  overfit_to_scene: null
  overfit_max_views: 148
  use_index_to_load_chunk: false
  mix_tartanair: false
  no_mix_test_set: true
  load_depth: false
  center_pose: false
  opencv_pose_format: true
  min_views: 0
  max_views: 0
  highres: false
  mix_re10k: false
  re10k_min_view_dist: 40
  re10k_max_view_dist: 300
  pose_align_first_view: false
  pose_align_middle_view: false
  scale_extrinsics: 1.0
  metric_scale_align_dl3dv: false
  load_remain_context: false
  num_remain_context: 8
  random_crop: false
  min_size:
  - 384
  - 512
  max_size:
  - 512
  - 960
scene_trainer:
  scene_initializer:
    per_pixel: true
    per_view: true
    name: resplat
    num_depth_candidates: 128
    num_surfaces: 1
    gaussians_per_pixel: 1
    gaussian_adapter:
      gaussian_scale_min: 0.5
      gaussian_scale_max: 0.3
      sh_degree: 3
      exp_scale: false
      softplus_scale: true
      clamp_min_scale: 1.0e-06
      scale_detach_depth: false
      exp_scale_bias: 4.0
      no_rotate_sh: true
      no_sh_mask: true
      init_rotation_identity: false
    d_feature: 128
    visualizer:
      num_samples: 8
      min_resolution: 256
      export_ply: false
    unimatch_weights_path: pretrained/gmdepth-scale1-resumeflowthings-scannet-5d9d7964.pth
    multiview_trans_attn_split: 2
    costvolume_unet_feat_dim: 128
    costvolume_unet_channel_mult:
    - 1
    - 1
    - 1
    costvolume_unet_attn_res: []
    depth_unet_feat_dim: 64
    depth_unet_attn_res: []
    depth_unet_channel_mult:
    - 1
    - 1
    - 1
    downscale_factor: 4
    shim_patch_size: 4
    local_mv_match: 2
    monodepth_vit_type: vits
    supervise_intermediate_depth: true
    return_depth: true
    num_scales: 1
    upsample_factor: 8
    lowest_feature_resolution: 8
    depth_unet_channels: 128
    grid_sample_disable_cudnn: false
    large_gaussian_head: false
    color_large_unet: false
    init_sh_input_img: true
    feature_upsampler_channels: 64
    gaussian_regressor_channels: 256
    unet_gaussian_regressor: false
    resnet_gaussian_regressor: false
    train_depth_only: false
    pt_head: true
    pt_heads: 1
    init_pt_with_mv_attn: false
    init_pt_with_mv_attn_lowres: false
    pt_head_channels: null
    pt_head_concat_img: false
    pt_head_conv: false
    multi_scale_pt: false
    attn_proj_channels: 64
    fps_num_samples: null
    knn_samples: 16
    post_norm: false
    no_rpe: true
    no_knn_attn: false
    num_blocks: 4
    pt_downsample: 0
    fps_agg_func: attn
    subsample_method: fps
    add_pt_residual: true
    pt_pred_residual_position: false
    freeze_depth: false
    use_gt_depth: false
    separate_depth_color: false
    separate_depth_type: small
    separate_depth_gaussian_scale: false
    sample_log_depth: true
    bilinear_upsample_depth: false
    no_upsample_depth: false
    return_lowres_depth: false
    foundationstereo: false
    fstereo_num_refine: 1
    lvsm_gaussian_regressor: false
    lvsm_layers: 6
    latent_gs: true
    latent_downsample: 4
    fixed_latent_size: true
    latent_gs_img_interp: area
    dpt_head_depth: false
    latent_dpt_upsampler: false
    latent_dpt_upsampler_no_concat: false
    light_dpt_feature: false
    avgpool_depth: false
    nearest_down_depth: false
    predict_scale: false
    norm_by_points: false
    no_pred_depth_range: false
    point_dist_init_gaussian_scale: false
    resizeconv_upsampler: false
    depth_pred_half_res: false
    use_amp: true
    pt_head_amp: true
    use_fsdp: false
    use_checkpointing: false
    init_use_checkpointing: false
    new_gaussian_order: true
    rotate_quat_to_world: false
    refine_rotate_quat_to_world: false
    refine_no_use_covariance: false
    latent_new_reshape: false
    ptv3: false
    ptv3_grid_size: 0.1
    no_pixel_offset: false
    init_gaussian_multiple: 1
    deform_sample_depth: false
    deform_sample_depth_debug: false
  scene_optimizer:
    refiner:
      name: none
      do_densify: false
      do_prune: false
      do_opacity_reset: false
      cap_max: -1
      noise_lr: 0.0
      pause_refine_after_reset: 0
      refine_every: 999999999
      reset_every: 999999999
      refine_start_iter: 999999999
      refine_stop_iter: 999999999
      refine_scale2d_stop_iter: 0
      grow_grad2d: 0.0
      grow_scale3d: 0.0
      prune_scale3d: 0.0
      prune_scale2d: 0.0
      grow_scale2d: 0.0
      min_opacity: 0.0
      prune_zero_radii: false
      reduce_opacity: false
      reduce_factor: 0.0
      reduce_every: 0
    lr_scheduler:
      lr_data:
        _base: 1
        _means: 1
        _scales: 1
        _quats: 1
        _opacities: 1
        _sh0: 1
        _shN: 1
      apply_scheduler:
        _base: false
        _means: false
        _scales: false
        _quats: false
        _opacities: false
        _sh0: false
        _shN: false
      name: none
    no_refine_mean: false
    no_refine_scale: false
    no_refine_rotation: false
    no_refine_opacity: false
    no_refine_sh0: false
    no_refine_shN: false
    name: clogs
    no_render_error: false
    refine_sh_only: false
    num_basic_refine_blocks: 4
    num_refine_blocks: 1
    input_error_lpips_features: false
    input_error_pool_vgg_features: false
    input_error_use_all_vgg_features: false
    input_error_vit_feature: false
    input_error_resnet_feature: true
    inpu_error_no_freeze_resnet_feature: false
    input_error_shallow_resnet_feature: false
    input_error_resnet_feature_layers: 18
    input_error_convnext_feature: false
    input_error_convnext_feature_size: small
    input_error_concat_feature: false
    input_error_concat_feature_cosine: false
    input_error_cosine_feature: false
    input_error_add_feature: false
    input_error_concat_rgb_feature_error: false
    concat_init_state: false
    replace_init_state: false
    state_channels: 256
    refine_block_rmsnorm: false
    refine_block_layernorm: false
    pt_qk_norm: false
    norm_pt_block: false
    refine_gaussian_multiple: 1
    refine_residual_init_state: false
    clamp_refine_max_scale: 3.0
    clamp_min_scale: 1.0e-06
    clamp_min_raw_opacities: -7
    clamp_max_raw_opacities: 7
    gaussian_head_multiple: 1
    update_head_concat_img: false
    update_head_layer_num: 2
    update_head_act: gelu
    update_head_final_act: identity
    update_head_scale_mag: false
    update_head_scalar_scale: true
    update_head_scalar_scale_act: relu
    update_head_hidden_dim_matches: "output"
    update_attn_proj_channels: 64
    update_no_knn_attn: false
    update_no_tran_block_norm: false
    update_tran_block_act: gelu
    multi_gaussian_scale_smaller: false
    init_gaussian_multiple: 1
    refine_condition_pt_feature: true
    input_error_no_abs: false
    reinit_gaussian_when_refine_multiple: false
    refine_same_num_points: false
    input_error_no_shuffle: false
    input_error_cache_resnet_feature: false
    input_error_view_pool_resnet_feature: false
    input_error_global_pool_resnet_feature: false
    init_state_wo_features: false
    init_state_type: constant
    init_state_scale: 0
    pt_heads: 1
    refine_with_mv_attn: false
    refine_with_mv_attn_lowres: false
    refine_no_mv_attn: false
    mv_attn_conv_with_norm: false
    refine_mv_shuffle_attn: false
    refine_mv_attn_with_pos_enc: false
    refine_shuffle_attn_no_norm: false
    refine_mv_unimatch_attn: false
    refine_knn_samples: 16
    refine_multi_scale_pt: false
    input_alpha: false
    input_depth: false
    input_depth_smooth_error: false
    input_error: false
    input_zero_gaussian: false
    input_zero_state: false
    input_zero_init_state: false
    input_error_cross_attn: false
    input_error_cross_attn_blocks: 1
    input_error_cross_attn_with_mlp: false
    input_error_radii_averaged: false
    input_error_additional_cross_attn: false
    input_error_num_intermediate_views: 8
    input_error_mv_attn: false
    input_error_mv_attn_blocks: 2
    input_error_mv_attn_lowres: false
    input_error_mv_attn_proj_channels: 0
    input_error_lowres_attn_down_factor: 4
    input_error_lowres_attn_pos_enc: false
    input_error_mv_attn_swin: false
    input_error_num_views: 0
    input_error_remain_context: false
    input_error_merge_remain_context: false
    input_error_warp_remain_context: false
    input_error_random_num_remain_context: false
    input_error_num_remain_context_test: 0
    input_error_warp_input_view: false
    window_local_refine: false
    window_global_refine: false
    window_local_global_refine: false
    update_window_size: 0
    local_gaussian_render: false
    input_error_half_res: false
    input_gradient: true
    input_gradient_log: false
    input_gradient_log_clip_deltas: 0.001
    input_gradient_scale: 1.0
    gradient_update_scale: 1.0
    input_gradient_with_ssim_loss: true
    input_gradient_same_loss: false
    scale_residual_grads: false
    train_min_refine: 1
    train_max_refine: 6
    local_global_update: false
    num_global_update: 0
    train_global_update_only: false
    random_update_with_size: false
    ptv3: false
    ptv3_grid_size: 0.1
    use_amp: true
    pt_head_amp: true
    pt_update_amp: true
    use_checkpointing: false
    recurrent_use_checkpointing: false
    debug_refine_update_module: true
    debug_refine_mlp: false
    debug_refine_mlp_layers: 0
    debug_refine_mlp_fixed_init: false
    input_gradient_normalize: true
    input_gradient_normalize_type: adam
    input_normalize_state: false
    input_normalize_gaussians: false
    predict_state_scale: true
    opt_scales_before_act: false
    scale_initial_opacities: 1.0
    sh_d: 16
    local_prune_zero_radii: false
    local_prune_low_weights: false
    local_prune_low_weights_thresh: -1
    update_only_nonzero_grad: true
    experimental_run: false
    experimental_update:
      _base: true
      _means: true
      _scales: true
      _quats: true
      _opacities: true
      _sh0: true
      _shN: true
    experimental_use_grads: false
    experimental_use_norm_grads:
      _base: false
      _means: false
      _scales: false
      _quats: false
      _opacities: false
      _sh0: false
      _shN: false
    experimental_lr:
      _base: 1
      _means: 0.00016
      _scales: 0.005
      _opacities: 0.05
      _quats: 0.001
      _sh0: 0.0025
      _shN: 0.000125
    sample_init_gaussians: 0
    use_time_encoding: false
    time_encoding_max_steps: 2000
  decoder:
    name: gsplat
    use_covariances: false
  use_fsdp: false
  train_scene_init: false
  train_scene_opt: true
  num_update_steps: 4
  iter_batch_size: -1
  iter_batch_size_min: -1
  iter_batch_size_max: -1
  initializer:
    num_depth_candidates: 128
    costvolume_unet_feat_dim: 128
    costvolume_unet_channel_mult:
    - 1
    - 1
    - 1
    costvolume_unet_attn_res:
    - 4
    gaussians_per_pixel: 1
    depth_unet_feat_dim: 32
    depth_unet_attn_res:
    - 16
    depth_unet_channel_mult:
    - 1
    - 1
    - 1
    - 1
    - 1
    shim_patch_size: 16
test:
  postprocessing:
    steps: 2000
    compute_metrics_every: 100
    lr_data:
      _base: 1
      _means: 1
      _scales: 1
      _opacities: 1
      _quats: 1
      _sh0: 1
      _shN: 1
    scheduler: null
    scheduler_warm_up_ratio: 0.01
    __target__: src.scene_trainer.postprocessing.NoPostProcessCfg
    name: none
  output_path: null
  compute_scores: true
  compute_scores_metrics: [psnr,ssim,lpips]
  eval_time_skip_steps: 0
  eval_initialization: true
  save_render_image: false
  save_gt_image: false
  save_render_depth: false
  save_gt_depth: false
  save_error_image: false
  save_error_depth: false
  save_video: false
  save_depth: false
  save_depth_npy: false
  save_depth_concat_img: false
  save_gaussian: false
  save_poses: false
  no_align_to_view: false
  save_point_cloud: false
  render_chunk_size: null
  stablize_camera: false
  stab_camera_kernel: 50
  eval_context_views: false
  inference_window_size: null
  profile_model: false
  save_colmap_train_test_views: false
  ori_colmap_data_path: null
  adam_optimizer_step: 0
  save_at_iters: null
  save_every_freq:
  - 1
  - 10
  - 100
  - 500
  save_every_steps:
  - 0
  - 10
  - 100
  - 1000
  skip_if_outputs_exist: false
  dec_chunk_size: 30
wandb:
  project: ablation-dl3dv-8views
  entity: placeholder
  name: dl3dv
  mode: online
  id: null
  notes: ''
  tags:
  - dl3dv
  - 270x480
mode: train
data_loader:
  train:
    num_workers: 10
    persistent_workers: true
    batch_size: 1
    seed: 1234
  test:
    num_workers: 4
    persistent_workers: false
    batch_size: 1
    seed: 2345
  val:
    num_workers: 1
    persistent_workers: true
    batch_size: 1
    seed: 3456
meta_optimizer:
  lr: 0.0001
  lr_monodepth: 0.0
  lr_depth: 0.0
  warm_up_steps: 2000
  weight_decay: 0.01
  warm_up_ratio: 0.01
  adamw_8bit: false
checkpointing:
  load: null
  every_n_train_steps: 1000
  save_top_k: 5
  pretrained_model: checkpoints/optgs/unified-dl3dv-8views/init/checkpoints/epoch_20-step_100000.ckpt
  pretrained_model_rel_dir: ${checkpoint_rel_dir:${checkpointing.pretrained_model}}
  pretrained_monodepth: null
  pretrained_mvdepth: null
  pretrained_depth: null
  pretrained_scale_predictor: null
  pretrained_depth_teacher: null
  no_strict_load: true
  resume: false
  no_resume_upsampler: false
  partial_load: false
  freeze_mono_vit: false
  freeze_mono_vit: false
  resume_update_module: null
train:
  depth_mode: null
  extended_visualization: false
  print_log_every_n_steps: 100
  eval_model_every_n_val: 2
  eval_data_length: 999999
  eval_deterministic: false
  eval_time_skip_steps: 3
  eval_save_model: true
  l1_loss: true
  intermediate_loss_weight: 0.9
  no_viz_video: false
  eval_depth: false
  forward_depth_only: false
  train_ignore_large_loss: 0.0
  no_log_projections: true
  no_log_video: true
  depth_loss_weight: 0.0
  log_depth_loss: true
  depth_smooth_loss_weight: 0.0
  depth_smooth_loss_nonorm: false
  depth_smooth_loss_weight_nvs: 0.0
  monodepth_loss_weight: 0.0
  depth_teacher_loss_weight: 0.0
  viz_depth_teacher: false
  eval_render_depth: false
  render_depth_loss_weight: 0.0
  viz_render_depth: false
  use_gt_depth_range: false
  depth_range_from_disparity: false
  max_disparity: 128.0
  min_disparity: 4.0
  img_warp_loss_weight: 0.0
  warp_loss_start_step: 5000
  loss_on_input_views: true
  loss_on_target_views: true
  loss_on_input_views_num: 4
  train_window_size: null
  half_res_lpips_loss: false
  viz_depth_separate: false
  use_replay_buffer: true
  replay_buffer_cfg:
    capacity: 20
    sample_batch_size: 1
    sample_prob: 0.7
    insert_prob: 0.99
    return_prob: 0.99
    simulate_ahead: true
    simulate_ahead_min_steps: 1
    simulate_ahead_max_steps: 50
    simulate_ahead_grow: 10000
    max_t: null
    push_only_if_not_full: false
seed: 111123
meta_trainer:
  max_steps: 100000
  val_check_interval: 0.25
  gradient_clip_val: 0.5
  num_sanity_val_steps: 2
  eval_index: null
  limit_test_batches: 1.0
  limit_train_batches: 1.0
  num_nodes: 1
output_dir: !!python/object/apply:src.misc.io.CustomPath
- checkpoints
- rebuttal
- dl3dv-8views
- submitted
use_plugins: false
log_slurm_id: true