SteEsp commited on
Commit
0a5af20
·
verified ·
1 Parent(s): 5338be3

Upload folder using huggingface_hub

Browse files
dense/checkpoints/epoch_5-step_50000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0126299c31a2210a1f3e595e1fe31b33072b141397dec425346a50f95735122
3
+ size 31482949
dense/config.yaml ADDED
@@ -0,0 +1,589 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loss:
2
+ stability:
3
+ weight: 1.0
4
+ lpips:
5
+ apply_after_step: 0
6
+ weight: 0.5
7
+ perceptual_loss: true
8
+ mse:
9
+ weight: 1.0
10
+ deltas:
11
+ weight: 1
12
+ exclude_by_norm_grad: true
13
+ exclude_by_norm_grad_opposite: true
14
+ eps: 1.0e-08
15
+ apply_after_step: 100
16
+ dataset:
17
+ image_shape:
18
+ - 256
19
+ - 448
20
+ background_color:
21
+ - 0.0
22
+ - 0.0
23
+ - 0.0
24
+ cameras_are_circular: false
25
+ overfit_to_scene: null
26
+ opencv_pose_format: false
27
+ pose_align_middle_view: false
28
+ test_start_idx: 0
29
+ view_sampler:
30
+ name: boundedv2
31
+ num_target_views: 6
32
+ num_context_views: 64
33
+ min_distance_between_context_views: -1
34
+ max_distance_between_context_views: -1
35
+ max_distance_to_context_views: 0
36
+ context_gap_warm_up_steps: 10000
37
+ target_gap_warm_up_steps: 0
38
+ initial_min_distance_between_context_views: -1
39
+ initial_max_distance_between_context_views: -1
40
+ initial_max_distance_to_context_views: 0
41
+ extra_views_sampling_strategy: farthest_point
42
+ target_views_replace_sample: false
43
+ name: dl3dv
44
+ roots:
45
+ - datasets/dl3dv-480p-chunks
46
+ make_baseline_1: false
47
+ augment: true
48
+ baseline_epsilon: 0.001
49
+ max_fov: 100.0
50
+ skip_bad_shape: true
51
+ near: 0.01
52
+ far: 200.0
53
+ baseline_scale_bounds: false
54
+ shuffle_val: true
55
+ test_len: -1
56
+ test_chunk_interval: 1
57
+ sort_target_index: true
58
+ sort_context_index: true
59
+ train_times_per_scene: 1
60
+ test_times_per_scene: 1
61
+ ori_image_shape:
62
+ - 270
63
+ - 480
64
+ overfit_max_views: 148
65
+ use_index_to_load_chunk: false
66
+ mix_tartanair: false
67
+ no_mix_test_set: true
68
+ load_depth: false
69
+ center_pose: false
70
+ pose_align_first_view: false
71
+ scale_extrinsics: 1.0
72
+ metric_scale_align_dl3dv: false
73
+ min_views: 0
74
+ max_views: 0
75
+ highres: false
76
+ mix_re10k: false
77
+ re10k_min_view_dist: 40
78
+ re10k_max_view_dist: 300
79
+ load_remain_context: false
80
+ num_remain_context: 8
81
+ random_crop: false
82
+ min_size:
83
+ - 384
84
+ - 512
85
+ max_size:
86
+ - 512
87
+ - 960
88
+ index_name: index_colmap.json
89
+ scene_trainer:
90
+ scene_initializer:
91
+ per_pixel: false
92
+ per_view: false
93
+ train_min_gaussians_subsample: 0.1
94
+ train_max_gaussians_subsample: 1.0
95
+ eval_min_gaussians_subsample: null
96
+ eval_max_gaussians_subsample: null
97
+ train_fixed_gaussians_num: 70000
98
+ eval_fixed_gaussians_num: 70000
99
+ name: colmap
100
+ path: datasets/dl3dv-colmap-sfm
101
+ normalize_world_space: false
102
+ scaling_factor: 1.0
103
+ init_opacity: 0.1
104
+ sh_degree: 3
105
+ dl3dv_settings: true
106
+ filter_zero_rgb: true
107
+ points3d_subdir: null
108
+ points3d_ply_filename: null
109
+ randomize_opacity: false
110
+ randomize_opacity_distribution: uniform
111
+ randomize_opacity_min: 0.0
112
+ randomize_opacity_std: 0.05
113
+ override_dataset_poses: true
114
+ scene_optimizer:
115
+ refiner:
116
+ name: none
117
+ do_densify: false
118
+ do_prune: false
119
+ do_opacity_reset: false
120
+ cap_max: -1
121
+ noise_lr: 0.0
122
+ pause_refine_after_reset: 0
123
+ refine_every: 999999999
124
+ reset_every: 999999999
125
+ refine_start_iter: 999999999
126
+ refine_stop_iter: 999999999
127
+ refine_scale2d_stop_iter: 0
128
+ grow_grad2d: 0.0
129
+ grow_scale3d: 0.0
130
+ prune_scale3d: 0.0
131
+ prune_scale2d: 0.0
132
+ grow_scale2d: 0.0
133
+ min_opacity: 0.0
134
+ prune_zero_radii: false
135
+ reduce_opacity: false
136
+ reduce_factor: 0.0
137
+ reduce_every: 0
138
+ lr_scheduler:
139
+ lr_data:
140
+ _base: 1
141
+ _means: 1
142
+ _scales: 1
143
+ _quats: 1
144
+ _opacities: 1
145
+ _sh0: 1
146
+ _shN: 1
147
+ apply_scheduler:
148
+ _base: false
149
+ _means: false
150
+ _scales: false
151
+ _quats: false
152
+ _opacities: false
153
+ _sh0: false
154
+ _shN: false
155
+ name: none
156
+ input_gradients_chunk_size: -1
157
+ no_refine_mean: false
158
+ no_refine_scale: false
159
+ no_refine_rotation: false
160
+ no_refine_opacity: false
161
+ no_refine_sh0: false
162
+ no_refine_shN: false
163
+ name: clogs
164
+ no_render_error: false
165
+ refine_sh_only: false
166
+ num_basic_refine_blocks: 4
167
+ num_refine_blocks: 1
168
+ concat_init_state: false
169
+ replace_init_state: false
170
+ state_channels: 256
171
+ refine_block_rmsnorm: false
172
+ refine_block_layernorm: false
173
+ pt_qk_norm: false
174
+ norm_pt_block: false
175
+ refine_gaussian_multiple: 1
176
+ refine_residual_init_state: false
177
+ clamp_refine_max_scale: 3.0
178
+ clamp_min_scale: 1.0e-06
179
+ clamp_min_raw_scales: -8.0
180
+ clamp_max_raw_scales: 2.3
181
+ clamp_min_raw_opacities: -7
182
+ clamp_max_raw_opacities: 7
183
+ gaussian_head_multiple: 1
184
+ clamp_min_sh0: -10000000000.0
185
+ clamp_max_sh0: 10000000000.0
186
+ clamp_min_shs: -2.0
187
+ clamp_max_shs: 2.0
188
+ clamp_shs_soft: false
189
+ update_attn_proj_channels: 64
190
+ update_no_knn_attn: false
191
+ update_no_tran_block_norm: false
192
+ update_tran_block_act: gelu
193
+ multi_gaussian_scale_smaller: false
194
+ init_gaussian_multiple: 1
195
+ refine_condition_pt_feature: true
196
+ input_error_no_abs: false
197
+ reinit_gaussian_when_refine_multiple: false
198
+ refine_same_num_points: false
199
+ input_error_rgb_no_shuffle: false
200
+ input_error_cache_resnet_feature: false
201
+ input_error_view_pool_resnet_feature: false
202
+ input_error_global_pool_resnet_feature: false
203
+ init_state_wo_features: true
204
+ init_state_type: random
205
+ init_state_scale: 1.0
206
+ pt_heads: 1
207
+ refine_with_mv_attn: false
208
+ refine_with_mv_attn_lowres: false
209
+ refine_no_mv_attn: false
210
+ mv_attn_conv_with_norm: false
211
+ refine_mv_shuffle_attn: false
212
+ refine_mv_attn_with_pos_enc: false
213
+ refine_shuffle_attn_no_norm: false
214
+ refine_mv_unimatch_attn: false
215
+ refine_knn_samples: 4
216
+ refine_multi_scale_pt: false
217
+ use_fused_attn: true
218
+ prune_invisible_gaussians: false
219
+ knn_idx_update_every: 1
220
+ input_alpha: false
221
+ input_depth: false
222
+ input_depth_smooth_error: false
223
+ input_error: false
224
+ input_error_add_rgb_feature: false
225
+ input_error_lpips_feature: false
226
+ input_error_pool_vgg_features: false
227
+ input_error_use_all_vgg_features: false
228
+ input_error_vit_feature: false
229
+ input_error_resnet_feature: true
230
+ input_error_no_freeze_resnet_feature: false
231
+ input_error_shallow_resnet_feature: false
232
+ input_error_resnet_feature_layers: 18
233
+ input_error_convnext_feature: false
234
+ input_error_convnext_feature_size: small
235
+ input_error_concat_feature: false
236
+ input_error_concat_feature_cosine: false
237
+ input_error_cosine_feature: false
238
+ input_error_add_feature: false
239
+ input_error_concat_rgb_feature: false
240
+ input_error_cross_attn: false
241
+ input_error_cross_attn_blocks: 1
242
+ input_error_cross_attn_with_mlp: false
243
+ input_error_radii_averaged: false
244
+ input_error_additional_cross_attn: false
245
+ input_error_num_intermediate_views: 8
246
+ input_error_mv_attn: false
247
+ input_error_mv_attn_blocks: 2
248
+ input_error_mv_attn_lowres: false
249
+ input_error_mv_attn_proj_channels: 0
250
+ input_error_lowres_attn_down_factor: 4
251
+ input_error_lowres_attn_pos_enc: false
252
+ input_error_mv_attn_swin: false
253
+ input_error_num_views: 0
254
+ input_error_remain_context: false
255
+ input_error_merge_remain_context: false
256
+ input_error_warp_remain_context: false
257
+ input_error_random_num_remain_context: false
258
+ input_error_num_remain_context_test: 0
259
+ input_error_warp_input_view: false
260
+ input_zero_gaussian: false
261
+ input_zero_state: false
262
+ input_zero_init_state: false
263
+ input_gradient: true
264
+ input_gradient_log: false
265
+ input_gradient_log_clip_deltas: 0.001
266
+ input_gradient_scale: 1.0
267
+ gradient_update_scale: 1.0
268
+ input_gradient_with_ssim_loss: true
269
+ input_gradient_same_loss: false
270
+ input_gradient_loss_reduction: mean_pixels_sum_views
271
+ scale_residual_grads: false
272
+ window_local_refine: false
273
+ window_global_refine: false
274
+ window_local_global_refine: false
275
+ update_window_size: 0
276
+ local_gaussian_render: false
277
+ input_error_half_res: false
278
+ local_global_update: false
279
+ num_global_update: 0
280
+ train_global_update_only: false
281
+ random_update_with_size: false
282
+ ptv3: false
283
+ ptv3_grid_size: 0.1
284
+ use_amp: true
285
+ pt_head_amp: true
286
+ pt_update_amp: true
287
+ use_checkpointing: false
288
+ recurrent_use_checkpointing: false
289
+ debug_refine_update_module: true
290
+ debug_refine_mlp: false
291
+ debug_refine_mlp_layers: 0
292
+ debug_refine_mlp_fixed_init: false
293
+ input_gradient_normalize: true
294
+ input_gradient_normalize_type: adam
295
+ input_normalize_state: false
296
+ input_normalize_gaussians: false
297
+ residual_state: false
298
+ predict_state_scale: true
299
+ predict_state_scale_norm: false
300
+ update_head_concat_img: false
301
+ update_head_layer_num: 2
302
+ update_head_act: gelu
303
+ update_head_final_act: identity
304
+ update_head_hidden_dim_matches: input
305
+ update_head_scale_mag: false
306
+ update_head_scalar_scale: true
307
+ update_head_scalar_scale_act: relu
308
+ update_head_per_param_heads: false
309
+ update_head_per_param_hidden_dim: 48
310
+ update_head_per_param_scales: false
311
+ opt_scales_before_act: true
312
+ scale_initial_opacities: 1.0
313
+ sh_d: 16
314
+ local_prune_zero_radii: false
315
+ local_prune_low_weights: false
316
+ local_prune_low_weights_thresh: -1
317
+ update_only_nonzero_grad: true
318
+ experimental_run: false
319
+ experimental_update:
320
+ _base: true
321
+ _means: true
322
+ _scales: true
323
+ _quats: true
324
+ _opacities: true
325
+ _sh0: true
326
+ _shN: true
327
+ experimental_use_grads: false
328
+ experimental_use_norm_grads:
329
+ _base: false
330
+ _means: false
331
+ _scales: false
332
+ _quats: false
333
+ _opacities: false
334
+ _sh0: false
335
+ _shN: false
336
+ experimental_lr:
337
+ _base: 1
338
+ _means: 0.00016
339
+ _scales: 0.005
340
+ _opacities: 0.05
341
+ _quats: 0.001
342
+ _sh0: 0.0025
343
+ _shN: 0.000125
344
+ sample_init_gaussians: 0
345
+ use_time_encoding: false
346
+ time_encoding_max_steps: 2000
347
+ decoder:
348
+ name: gsplat
349
+ use_covariances: false
350
+ rasterize_mode: classic
351
+ eps2d: 0.0001
352
+ use_fsdp: false
353
+ train_scene_init: false
354
+ train_scene_opt: true
355
+ train_min_refine: 1
356
+ train_max_refine: 6
357
+ num_update_steps: 2000
358
+ iter_batch_size: -1
359
+ opt_batch_size: 8
360
+ opt_batch_size_min: 0
361
+ opt_batch_size_max: 0
362
+ opt_batch_strategy: random
363
+ sh_degree_interval: 0
364
+ test:
365
+ postprocessing:
366
+ steps: 0
367
+ compute_metrics_every: 100
368
+ lr_data:
369
+ _base: 1
370
+ _means: 1
371
+ _scales: 1
372
+ _opacities: 1
373
+ _quats: 1
374
+ _sh0: 1
375
+ _shN: 1
376
+ scheduler: null
377
+ scheduler_warm_up_ratio: 0.01
378
+ prior_steps: 0
379
+ means_lr_final_ratio: 0.0625
380
+ means_lr_delay_mult: 0.01
381
+ means_lr_scale_by_scene_extent: true
382
+ chunk_size: -1
383
+ adc: null
384
+ name: none
385
+ output_path: null
386
+ compute_scores: true
387
+ compute_scores_metrics:
388
+ - psnr
389
+ - ssim
390
+ - lpips
391
+ metrics_batch_size: 32
392
+ eval_time_skip_steps: 0
393
+ eval_initialization: true
394
+ save_render_image: false
395
+ save_render_image_last_only: false
396
+ save_gt_image: false
397
+ save_render_depth: false
398
+ save_gt_depth: false
399
+ save_error_image: false
400
+ save_error_depth: false
401
+ save_video: false
402
+ save_video_fixed_view: false
403
+ save_video_fixed_view_index: 0
404
+ save_video_fixed_view_duplicate: 0
405
+ save_video_fixed_iteration: false
406
+ save_video_fixed_iteration_indices: null
407
+ save_video_fixed_iteration_render_fixed_view: false
408
+ save_video_combined: false
409
+ save_video_combined_iterations: null
410
+ save_video_combined_fixed_iteration_length: 50
411
+ save_depth: false
412
+ save_depth_npy: false
413
+ save_depth_concat_img: false
414
+ save_gaussian: false
415
+ save_poses: false
416
+ save_cameras_json: true
417
+ no_align_to_view: false
418
+ save_point_cloud: false
419
+ render_chunk_size: null
420
+ dec_chunk_size: 30
421
+ stablize_camera: false
422
+ stab_camera_kernel: 50
423
+ eval_context_views: false
424
+ inference_window_size: null
425
+ profile_model: false
426
+ save_colmap_train_test_views: false
427
+ ori_colmap_data_path: null
428
+ adam_optimizer_step: 0
429
+ save_at_iters:
430
+ - 0
431
+ - 1
432
+ - 5
433
+ - 10
434
+ - 50
435
+ - 100
436
+ - 200
437
+ - 300
438
+ - 400
439
+ - 500
440
+ - 1000
441
+ save_every_freq:
442
+ - 1
443
+ - 10
444
+ - 100
445
+ - 500
446
+ save_every_steps:
447
+ - 0
448
+ - 10
449
+ - 100
450
+ - 1000
451
+ skip_if_outputs_exist: false
452
+ scenes_filter: null
453
+ experimental_add_noise_to_images: false
454
+ experimental_add_noise_to_images_std: null
455
+ train:
456
+ replay_buffer_cfg:
457
+ capacity: 20
458
+ sample_batch_size: 1
459
+ sample_prob: 0.7
460
+ insert_prob: 0.7
461
+ return_prob: 0.99
462
+ simulate_ahead: true
463
+ simulate_ahead_min_steps: 1
464
+ simulate_ahead_max_steps: 50
465
+ simulate_ahead_grow: 10000
466
+ max_t: null
467
+ push_only_if_not_full: false
468
+ remove_strategy_when_full: oldest
469
+ depth_mode: null
470
+ extended_visualization: false
471
+ print_log_every_n_steps: 100
472
+ eval_model_every_n_val: 2
473
+ eval_data_length: 5
474
+ eval_deterministic: false
475
+ eval_time_skip_steps: 3
476
+ eval_save_model: true
477
+ l1_loss: true
478
+ intermediate_loss_weight: 0.9
479
+ no_viz_video: false
480
+ eval_depth: false
481
+ forward_depth_only: false
482
+ train_ignore_large_loss: 0.0
483
+ no_log_projections: true
484
+ no_log_video: true
485
+ depth_loss_weight: 0.0
486
+ log_depth_loss: true
487
+ depth_smooth_loss_weight: 0.0
488
+ depth_smooth_loss_nonorm: false
489
+ depth_smooth_loss_weight_nvs: 0.0
490
+ monodepth_loss_weight: 0.0
491
+ depth_teacher_loss_weight: 0.0
492
+ viz_depth_teacher: false
493
+ eval_render_depth: false
494
+ render_depth_loss_weight: 0.0
495
+ viz_render_depth: false
496
+ use_gt_depth_range: false
497
+ depth_range_from_disparity: false
498
+ max_disparity: 128.0
499
+ min_disparity: 4.0
500
+ img_warp_loss_weight: 0.0
501
+ warp_loss_start_step: 5000
502
+ loss_on_input_views: true
503
+ loss_on_target_views: true
504
+ loss_on_input_views_num: 4
505
+ loss_on_target_views_num: 6
506
+ train_window_size: null
507
+ half_res_lpips_loss: false
508
+ viz_depth_separate: false
509
+ scale_l2_loss_weight: 0.0
510
+ sh_l2_loss_weight: 0.0
511
+ opacity_l2_loss_weight: 0.0
512
+ use_replay_buffer: true
513
+ wandb:
514
+ project: eccv
515
+ entity: placeholder
516
+ name: dl3dv
517
+ mode: online
518
+ id: null
519
+ notes: ''
520
+ tags:
521
+ - dl3dv
522
+ - 270x480
523
+ mode: train
524
+ data_loader:
525
+ train:
526
+ num_workers: 10
527
+ persistent_workers: true
528
+ batch_size: 1
529
+ seed: 1234
530
+ test:
531
+ num_workers: 4
532
+ persistent_workers: false
533
+ batch_size: 1
534
+ seed: 2345
535
+ val:
536
+ num_workers: 1
537
+ persistent_workers: true
538
+ batch_size: 1
539
+ seed: 3456
540
+ meta_optimizer:
541
+ lr: 0.0001
542
+ lr_monodepth: 0.0
543
+ lr_depth: 0.0
544
+ warm_up_steps: 2000
545
+ weight_decay: 0.01
546
+ warm_up_ratio: 0.01
547
+ adamw_8bit: false
548
+ checkpointing:
549
+ load: null
550
+ every_n_train_steps: 1000
551
+ save_top_k: 5
552
+ pretrained_model: null
553
+ pretrained_model_rel_dir: ${checkpoint_rel_dir:${checkpointing.pretrained_model}}
554
+ pretrained_monodepth: null
555
+ pretrained_mvdepth: null
556
+ pretrained_depth: null
557
+ pretrained_scale_predictor: null
558
+ pretrained_depth_teacher: null
559
+ no_strict_load: false
560
+ resume: false
561
+ no_resume_upsampler: false
562
+ partial_load: false
563
+ freeze_mono_vit: false
564
+ resume_update_module: null
565
+ pretrained_initializer: null
566
+ pretrained_optimizer: null
567
+ load_existing_cfg: false
568
+ seed: 111123
569
+ meta_trainer:
570
+ max_steps: 50000
571
+ val_check_interval: 0.25
572
+ gradient_clip_val: 0.5
573
+ num_sanity_val_steps: 1
574
+ eval_index: null
575
+ limit_test_batches: 1.0
576
+ limit_train_batches: 1.0
577
+ num_nodes: 1
578
+ output_dir: !!python/object/apply:src.misc.io.CustomPath
579
+ - checkpoints
580
+ - post_eccv
581
+ - sfm_init
582
+ - dl3dv_dense
583
+ - dense_knn_4_stability_or_rerun
584
+ use_plugins: false
585
+ log_slurm_id: true
586
+ version: 1
587
+ profiling:
588
+ mode: none
589
+ debug_cfg: false
init/checkpoints/epoch_20-step_100000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fff9144744c2ae32f5e6171b42a119300031202ac97571ae5b139a973616118
3
+ size 480939566
init/config.yaml ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ view_sampler:
3
+ name: boundedv2
4
+ num_target_views: 6
5
+ num_context_views: 8
6
+ min_distance_between_context_views: 24
7
+ max_distance_between_context_views: 45
8
+ max_distance_to_context_views: 0
9
+ context_gap_warm_up_steps: 10000
10
+ target_gap_warm_up_steps: 0
11
+ initial_min_distance_between_context_views: 20
12
+ initial_max_distance_between_context_views: 30
13
+ initial_max_distance_to_context_views: 0
14
+ extra_views_sampling_strategy: farthest_point
15
+ target_views_replace_sample: false
16
+ name: dl3dv
17
+ roots:
18
+ - datasets/dl3dv-480p-chunks
19
+ make_baseline_1: false
20
+ augment: true
21
+ image_shape:
22
+ - 256
23
+ - 448
24
+ background_color:
25
+ - 0.0
26
+ - 0.0
27
+ - 0.0
28
+ cameras_are_circular: false
29
+ baseline_epsilon: 0.001
30
+ max_fov: 100.0
31
+ skip_bad_shape: true
32
+ near: 0.01
33
+ far: 200.0
34
+ baseline_scale_bounds: false
35
+ shuffle_val: true
36
+ test_len: -1
37
+ test_chunk_interval: 1
38
+ sort_target_index: true
39
+ sort_context_index: true
40
+ train_times_per_scene: 1
41
+ test_times_per_scene: 1
42
+ ori_image_shape:
43
+ - 270
44
+ - 480
45
+ overfit_max_views: 148
46
+ use_index_to_load_chunk: false
47
+ mix_tartanair: false
48
+ no_mix_test_set: true
49
+ load_depth: false
50
+ center_pose: false
51
+ opencv_pose_format: null
52
+ overfit_to_scene: null
53
+ min_views: 0
54
+ max_views: 0
55
+ highres: false
56
+ mix_re10k: false
57
+ re10k_min_view_dist: 40
58
+ re10k_max_view_dist: 300
59
+ pose_align_first_view: false
60
+ scale_extrinsics: 1.0
61
+ metric_scale_align_dl3dv: false
62
+ load_remain_context: false
63
+ num_remain_context: 8
64
+ random_crop: false
65
+ min_size:
66
+ - 384
67
+ - 512
68
+ max_size:
69
+ - 512
70
+ - 960
71
+ scene_trainer:
72
+ scene_initializer:
73
+ name: resplat
74
+ num_depth_candidates: 128
75
+ num_surfaces: 1
76
+ gaussians_per_pixel: 1
77
+ gaussian_adapter:
78
+ gaussian_scale_min: 0.5
79
+ gaussian_scale_max: 0.3
80
+ sh_degree: 3
81
+ exp_scale: false
82
+ softplus_scale: true
83
+ clamp_min_scale: 1.0e-06
84
+ scale_detach_depth: false
85
+ exp_scale_bias: 4.0
86
+ no_rotate_sh: true
87
+ no_sh_mask: true
88
+ init_rotation_identity: false
89
+ d_feature: 128
90
+ visualizer:
91
+ num_samples: 8
92
+ min_resolution: 256
93
+ export_ply: false
94
+ unimatch_weights_path: pretrained/gmdepth-scale1-resumeflowthings-scannet-5d9d7964.pth
95
+ multiview_trans_attn_split: 2
96
+ costvolume_unet_feat_dim: 128
97
+ costvolume_unet_channel_mult:
98
+ - 1
99
+ - 1
100
+ - 1
101
+ costvolume_unet_attn_res: []
102
+ depth_unet_feat_dim: 64
103
+ depth_unet_attn_res: []
104
+ depth_unet_channel_mult:
105
+ - 1
106
+ - 1
107
+ - 1
108
+ downscale_factor: 4
109
+ shim_patch_size: 4
110
+ local_mv_match: 2
111
+ monodepth_vit_type: vits
112
+ supervise_intermediate_depth: true
113
+ return_depth: true
114
+ num_scales: 1
115
+ upsample_factor: 8
116
+ lowest_feature_resolution: 8
117
+ depth_unet_channels: 128
118
+ grid_sample_disable_cudnn: false
119
+ large_gaussian_head: false
120
+ color_large_unet: false
121
+ init_sh_input_img: true
122
+ feature_upsampler_channels: 64
123
+ gaussian_regressor_channels: 256
124
+ unet_gaussian_regressor: false
125
+ resnet_gaussian_regressor: false
126
+ train_depth_only: false
127
+ pt_head: true
128
+ pt_heads: 1
129
+ init_pt_with_mv_attn: false
130
+ init_pt_with_mv_attn_lowres: false
131
+ pt_head_channels: null
132
+ pt_head_concat_img: false
133
+ pt_head_conv: false
134
+ multi_scale_pt: false
135
+ attn_proj_channels: 64
136
+ fps_num_samples: null
137
+ knn_samples: 16
138
+ post_norm: false
139
+ no_rpe: true
140
+ no_knn_attn: false
141
+ num_blocks: 4
142
+ pt_downsample: 0
143
+ fps_agg_func: attn
144
+ subsample_method: fps
145
+ add_pt_residual: true
146
+ pt_pred_residual_position: false
147
+ freeze_depth: false
148
+ use_gt_depth: false
149
+ separate_depth_color: false
150
+ separate_depth_type: small
151
+ separate_depth_gaussian_scale: false
152
+ sample_log_depth: true
153
+ bilinear_upsample_depth: false
154
+ no_upsample_depth: false
155
+ return_lowres_depth: false
156
+ foundationstereo: false
157
+ fstereo_num_refine: 1
158
+ lvsm_gaussian_regressor: false
159
+ lvsm_layers: 6
160
+ latent_gs: true
161
+ latent_downsample: 4
162
+ fixed_latent_size: true
163
+ latent_gs_img_interp: area
164
+ dpt_head_depth: false
165
+ latent_dpt_upsampler: false
166
+ latent_dpt_upsampler_no_concat: false
167
+ light_dpt_feature: false
168
+ avgpool_depth: false
169
+ nearest_down_depth: false
170
+ predict_scale: false
171
+ norm_by_points: false
172
+ no_pred_depth_range: false
173
+ point_dist_init_gaussian_scale: false
174
+ resizeconv_upsampler: false
175
+ depth_pred_half_res: false
176
+ use_amp: true
177
+ pt_head_amp: true
178
+ use_fsdp: false
179
+ use_checkpointing: false
180
+ init_use_checkpointing: false
181
+ new_gaussian_order: true
182
+ rotate_quat_to_world: false
183
+ refine_rotate_quat_to_world: false
184
+ refine_no_use_covariance: false
185
+ latent_new_reshape: false
186
+ ptv3: false
187
+ ptv3_grid_size: 0.1
188
+ no_pixel_offset: false
189
+ init_gaussian_multiple: 1
190
+ deform_sample_depth: false
191
+ deform_sample_depth_debug: false
192
+ scene_optimizer:
193
+ name: depthsplat
194
+ no_refine_rotation: false
195
+ no_refine_mean: false
196
+ no_render_error: false
197
+ refine_sh_only: false
198
+ num_basic_refine_blocks: 4
199
+ num_refine_blocks: 1
200
+ refine_lpips_error: false
201
+ refine_pool_vgg_features: false
202
+ refine_use_all_vgg_features: false
203
+ refine_vit_feature: false
204
+ refine_resnet_feature: false
205
+ no_freeze_resnet_feature: false
206
+ shallow_resnet_feature: false
207
+ resnet_feature_layers: 18
208
+ refine_convnext_feature: false
209
+ convnext_feature_size: small
210
+ refine_concat_feature: false
211
+ refine_concat_feature_cosine: false
212
+ refine_cosine_feature: false
213
+ refine_add_feature: false
214
+ refine_concat_rgb_feature_error: false
215
+ concat_init_state: true
216
+ replace_init_state: false
217
+ state_channels: 0
218
+ refine_block_rmsnorm: false
219
+ refine_block_layernorm: false
220
+ pt_qk_norm: false
221
+ norm_pt_block: false
222
+ refine_gaussian_multiple: 1
223
+ refine_residual_init_state: false
224
+ clamp_refine_max_scale: 3.0
225
+ refine_no_rpe: false
226
+ gaussian_head_multiple: 1
227
+ update_head_concat_img: false
228
+ update_head_act: gelu
229
+ update_attn_proj_channels: null
230
+ update_no_knn_attn: false
231
+ update_no_tran_block_norm: false
232
+ update_tran_block_act: gelu
233
+ multi_gaussian_scale_smaller: false
234
+ init_gaussian_multiple: 1
235
+ no_pixel_offset: false
236
+ refine_condition_pt_feature: false
237
+ render_error_no_abs: false
238
+ reinit_gaussian_when_refine_multiple: false
239
+ refine_same_num_points: false
240
+ render_rgb_error_no_shuffle: false
241
+ refine_cache_resnet_feature: false
242
+ refine_view_pool_resnet_feature: false
243
+ refine_global_pool_resnet_feature: false
244
+ pt_heads: 1
245
+ refine_with_mv_attn: false
246
+ refine_with_mv_attn_lowres: false
247
+ refine_no_mv_attn: false
248
+ mv_attn_conv_with_norm: false
249
+ refine_mv_shuffle_attn: false
250
+ refine_mv_attn_with_pos_enc: false
251
+ refine_shuffle_attn_no_norm: false
252
+ refine_mv_unimatch_attn: false
253
+ refine_knn_samples: 16
254
+ refine_multi_scale_pt: false
255
+ refine_input_alpha: false
256
+ refine_input_depth: false
257
+ refine_input_depth_smooth_error: false
258
+ refine_input_error: true
259
+ refine_input_zero_gaussian: false
260
+ refine_input_zero_state: false
261
+ refine_output_scale_mag: false
262
+ cross_attn_render_error: false
263
+ cross_attn_render_error_blocks: 1
264
+ cross_attn_with_mlp: false
265
+ radii_averaged_render_error: false
266
+ cross_attn_additional_render_error: false
267
+ num_intermediate_views: 8
268
+ render_error_mv_attn: false
269
+ render_error_mv_attn_blocks: 2
270
+ render_error_mv_attn_lowres: false
271
+ mv_attn_proj_channels: 0
272
+ lowres_attn_down_factor: 4
273
+ lowres_attn_pos_enc: false
274
+ render_error_mv_attn_swin: false
275
+ render_error_num_views: 0
276
+ render_error_remain_context: false
277
+ render_error_merge_remain_context: false
278
+ render_error_warp_remain_context: false
279
+ render_error_random_num_remain_context: false
280
+ render_error_num_remain_context_test: 0
281
+ render_error_warp_input_view: false
282
+ window_local_refine: false
283
+ window_global_refine: false
284
+ window_local_global_refine: false
285
+ update_window_size: 0
286
+ local_gaussian_render: false
287
+ render_error_half_res: false
288
+ prune_opacity: 0.0
289
+ prune_radii: 0
290
+ refine_input_gradient: false
291
+ refine_input_gradient_log: false
292
+ refine_input_gradient_log_clip_deltas: 0.001
293
+ refine_input_gradient_scale: 1.0
294
+ gradient_update_scale: 1.0
295
+ refine_gradient_with_ssim_loss: false
296
+ refine_input_gradient_same_loss: false
297
+ train_min_refine: 0
298
+ train_max_refine: 0
299
+ local_global_update: false
300
+ num_global_update: 0
301
+ train_global_update_only: false
302
+ random_update_with_size: false
303
+ ptv3: false
304
+ ptv3_grid_size: 0.1
305
+ use_amp: true
306
+ pt_head_amp: true
307
+ pt_update_amp: true
308
+ use_fsdp: false
309
+ use_checkpointing: false
310
+ init_use_checkpointing: false
311
+ debug_refine_update_module: true
312
+ debug_refine_mlp: false
313
+ debug_refine_mlp_layers: 0
314
+ debug_refine_mlp_fixed_init: false
315
+ decoder:
316
+ name: gsplat
317
+ scale_invariant: false
318
+ initializer:
319
+ num_depth_candidates: 128
320
+ costvolume_unet_feat_dim: 128
321
+ costvolume_unet_channel_mult:
322
+ - 1
323
+ - 1
324
+ - 1
325
+ costvolume_unet_attn_res:
326
+ - 4
327
+ gaussians_per_pixel: 1
328
+ depth_unet_feat_dim: 32
329
+ depth_unet_attn_res:
330
+ - 16
331
+ depth_unet_channel_mult:
332
+ - 1
333
+ - 1
334
+ - 1
335
+ - 1
336
+ - 1
337
+ shim_patch_size: 16
338
+ use_fsdp: false
339
+ train_scene_init: true
340
+ train_scene_opt: false
341
+ num_update_steps: 0
342
+ loss:
343
+ mse:
344
+ weight: 1.0
345
+ lpips:
346
+ weight: 0.5
347
+ apply_after_step: 0
348
+ perceptual_loss: true
349
+ test:
350
+ postprocessing:
351
+ __target__: src.model.postprocessing.PostProcessCfg
352
+ enabled: false
353
+ name: none
354
+ steps: 0
355
+ compute_metrics_every: 0
356
+ lr: 0
357
+ scheduler: null
358
+ scheduler_warm_up_ratio: 0.0
359
+ output_path: null
360
+ compute_scores: true
361
+ eval_time_skip_steps: 0
362
+ save_image: false
363
+ save_video: false
364
+ save_gt_image: false
365
+ save_input_images: false
366
+ save_depth: false
367
+ save_depth_npy: false
368
+ save_depth_concat_img: false
369
+ save_gaussian: false
370
+ no_align_to_view: false
371
+ save_point_cloud: false
372
+ render_chunk_size: null
373
+ stablize_camera: false
374
+ stab_camera_kernel: 50
375
+ render_input_views: false
376
+ inference_window_size: null
377
+ profile_model: false
378
+ save_colmap_train_test_views: false
379
+ ori_colmap_data_path: null
380
+ adam_optimizer_step: 0
381
+ save_every_freq: null
382
+ save_every_steps: null
383
+ dec_chunk_size: 30
384
+ wandb:
385
+ project: unified-dl3dv-8views
386
+ entity: placeholder
387
+ name: dl3dv
388
+ mode: online
389
+ id: null
390
+ notes: null
391
+ tags:
392
+ - dl3dv
393
+ - 270x480
394
+ mode: train
395
+ data_loader:
396
+ train:
397
+ num_workers: 10
398
+ persistent_workers: true
399
+ batch_size: 2
400
+ seed: 1234
401
+ test:
402
+ num_workers: 4
403
+ persistent_workers: false
404
+ batch_size: 1
405
+ seed: 2345
406
+ val:
407
+ num_workers: 1
408
+ persistent_workers: true
409
+ batch_size: 1
410
+ seed: 3456
411
+ meta_optimizer:
412
+ lr: 0.0002
413
+ lr_monodepth: 2.0e-06
414
+ lr_depth: 0.0
415
+ warm_up_steps: 2000
416
+ weight_decay: 0.01
417
+ warm_up_ratio: 0.01
418
+ adamw_8bit: false
419
+ checkpointing:
420
+ load: null
421
+ every_n_train_steps: 1000
422
+ save_top_k: 5
423
+ pretrained_model: null
424
+ pretrained_model_rel_dir: ${checkpoint_rel_dir:${checkpointing.pretrained_model}}
425
+ pretrained_monodepth: null
426
+ pretrained_mvdepth: null
427
+ pretrained_depth: pretrained/depthsplat-depth-small-352x640-samplelogdepth-b0ebc084.pth
428
+ pretrained_scale_predictor: null
429
+ pretrained_depth_teacher: null
430
+ no_strict_load: false
431
+ resume: false
432
+ no_resume_upsampler: false
433
+ partial_load: false
434
+ freeze_mono_vit: false
435
+ resume_update_module: null
436
+ train:
437
+ depth_mode: null
438
+ extended_visualization: false
439
+ print_log_every_n_steps: 100
440
+ eval_model_every_n_val: 2
441
+ eval_data_length: 999999
442
+ eval_deterministic: false
443
+ eval_time_skip_steps: 3
444
+ eval_save_model: true
445
+ l1_loss: true
446
+ intermediate_loss_weight: 0.9
447
+ no_viz_video: false
448
+ eval_depth: false
449
+ forward_depth_only: false
450
+ train_ignore_large_loss: 0.0
451
+ no_log_projections: true
452
+ no_log_video: true
453
+ depth_loss_weight: 0.0
454
+ log_depth_loss: true
455
+ depth_smooth_loss_weight: 0.01
456
+ depth_smooth_loss_nonorm: false
457
+ depth_smooth_loss_weight_nvs: 0.0
458
+ monodepth_loss_weight: 0.0
459
+ depth_teacher_loss_weight: 0.0
460
+ viz_depth_teacher: false
461
+ eval_render_depth: false
462
+ render_depth_loss_weight: 0.0
463
+ viz_render_depth: false
464
+ use_gt_depth_range: false
465
+ depth_range_from_disparity: false
466
+ max_disparity: 128.0
467
+ min_disparity: 4.0
468
+ img_warp_loss_weight: 0.0
469
+ warp_loss_start_step: 5000
470
+ loss_on_input_views: false
471
+ train_window_size: null
472
+ half_res_lpips_loss: false
473
+ viz_depth_separate: false
474
+ seed: 111123
475
+ meta_trainer:
476
+ max_steps: 100000
477
+ val_check_interval: 0.5
478
+ gradient_clip_val: 0.5
479
+ num_sanity_val_steps: 2
480
+ eval_index: null
481
+ limit_test_batches: 1.0
482
+ limit_train_batches: 1.0
483
+ num_nodes: 1
484
+ output_dir: !!python/object/apply:pathlib.PosixPath
485
+ - checkpoints
486
+ - optgs
487
+ - unified-dl3dv-8views
488
+ - init
489
+ use_plugins: false
490
+ log_slurm_id: true