| collect_input_args = [ | |
| 'image_lr', | |
| 'crops_image_hr', | |
| 'depth_gt', | |
| 'crop_depths', | |
| 'bboxs', | |
| 'image_hr', | |
| ] | |
| convert_syncbn = True | |
| debug = False | |
| env_cfg = dict( | |
| cudnn_benchmark=True, | |
| dist_cfg=dict(backend='nccl'), | |
| mp_cfg=dict(mp_start_method='forkserver')) | |
| find_unused_parameters = True | |
| general_dataloader = dict( | |
| batch_size=1, | |
| dataset=dict( | |
| dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'), | |
| num_workers=2) | |
| launcher = 'pytorch' | |
| log_name = 'patchfusion' | |
| max_depth = 80 | |
| min_depth = 0.001 | |
| model = dict( | |
| coarse_branch=dict( | |
| attractor_alpha=1000, | |
| attractor_gamma=2, | |
| attractor_kind='mean', | |
| attractor_type='inv', | |
| aug=True, | |
| bin_centers_type='softplus', | |
| bin_embedding_dim=128, | |
| clip_grad=0.1, | |
| dataset='nyu', | |
| depth_anything=True, | |
| distributed=True, | |
| do_resize=False, | |
| force_keep_ar=True, | |
| freeze_midas_bn=True, | |
| gpu='NULL', | |
| img_size=[ | |
| 392, | |
| 518, | |
| ], | |
| inverse_midas=False, | |
| log_images_every=0.1, | |
| max_depth=80, | |
| max_temp=50.0, | |
| max_translation=100, | |
| memory_efficient=True, | |
| midas_model_type='vitb', | |
| min_depth=0.001, | |
| min_temp=0.0212, | |
| model='zoedepth', | |
| n_attractors=[ | |
| 16, | |
| 8, | |
| 4, | |
| 1, | |
| ], | |
| n_bins=64, | |
| name='ZoeDepth', | |
| notes='', | |
| output_distribution='logbinomial', | |
| prefetch=False, | |
| pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', | |
| print_losses=False, | |
| project='ZoeDepth', | |
| random_crop=False, | |
| random_translate=False, | |
| root='.', | |
| save_dir='', | |
| shared_dict='NULL', | |
| tags='', | |
| train_midas=True, | |
| translate_prob=0.2, | |
| type='DA-ZoeDepth', | |
| uid='NULL', | |
| use_amp=False, | |
| use_pretrained_midas=True, | |
| use_shared_dict=False, | |
| validate_every=0.25, | |
| version_name='v1', | |
| workers=16), | |
| fine_branch=dict( | |
| attractor_alpha=1000, | |
| attractor_gamma=2, | |
| attractor_kind='mean', | |
| attractor_type='inv', | |
| aug=True, | |
| bin_centers_type='softplus', | |
| bin_embedding_dim=128, | |
| clip_grad=0.1, | |
| dataset='nyu', | |
| depth_anything=True, | |
| distributed=True, | |
| do_resize=False, | |
| force_keep_ar=True, | |
| freeze_midas_bn=True, | |
| gpu='NULL', | |
| img_size=[ | |
| 392, | |
| 518, | |
| ], | |
| inverse_midas=False, | |
| log_images_every=0.1, | |
| max_depth=80, | |
| max_temp=50.0, | |
| max_translation=100, | |
| memory_efficient=True, | |
| midas_model_type='vitb', | |
| min_depth=0.001, | |
| min_temp=0.0212, | |
| model='zoedepth', | |
| n_attractors=[ | |
| 16, | |
| 8, | |
| 4, | |
| 1, | |
| ], | |
| n_bins=64, | |
| name='ZoeDepth', | |
| notes='', | |
| output_distribution='logbinomial', | |
| prefetch=False, | |
| pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', | |
| print_losses=False, | |
| project='ZoeDepth', | |
| random_crop=False, | |
| random_translate=False, | |
| root='.', | |
| save_dir='', | |
| shared_dict='NULL', | |
| tags='', | |
| train_midas=True, | |
| translate_prob=0.2, | |
| type='DA-ZoeDepth', | |
| uid='NULL', | |
| use_amp=False, | |
| use_pretrained_midas=True, | |
| use_shared_dict=False, | |
| validate_every=0.25, | |
| version_name='v1', | |
| workers=16), | |
| guided_fusion=dict( | |
| g2l=True, | |
| in_channels=[ | |
| 32, | |
| 128, | |
| 128, | |
| 128, | |
| 128, | |
| 128, | |
| ], | |
| n_channels=5, | |
| num_patches=[ | |
| 203056, | |
| 66304, | |
| 16576, | |
| 4144, | |
| 1036, | |
| 266, | |
| ], | |
| patch_process_shape=( | |
| 392, | |
| 518, | |
| ), | |
| type='GuidedFusionPatchFusion'), | |
| max_depth=80, | |
| min_depth=0.001, | |
| patch_process_shape=( | |
| 392, | |
| 518, | |
| ), | |
| pretrain_model=[ | |
| './work_dir/depthanything_vitb_u4k/coarse_pretrain/checkpoint_24.pth', | |
| './work_dir/depthanything_vitb_u4k/fine_pretrain/checkpoint_24.pth', | |
| ], | |
| sigloss=dict(type='SILogLoss'), | |
| type='PatchFusion') | |
| optim_wrapper = dict( | |
| clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'), | |
| optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.001), | |
| paramwise_cfg=dict(bypass_duplicate=True, custom_keys=dict())) | |
| param_scheduler = dict( | |
| base_momentum=0.85, | |
| cycle_momentum=True, | |
| div_factor=10, | |
| final_div_factor=10000, | |
| max_momentum=0.95, | |
| pct_start=0.25, | |
| three_phase=False) | |
| project = 'patchfusion' | |
| resume = False | |
| tags = [ | |
| 'patchfusion', | |
| 'da', | |
| 'vitb', | |
| ] | |
| test_in_dataloader = dict( | |
| batch_size=1, | |
| dataset=dict( | |
| data_root='./data/u4k', | |
| max_depth=80, | |
| min_depth=0.001, | |
| mode='infer', | |
| split='./data/u4k/splits/test.txt', | |
| transform_cfg=dict(network_process_size=[ | |
| 384, | |
| 512, | |
| ]), | |
| type='UnrealStereo4kDataset'), | |
| num_workers=2) | |
| test_out_dataloader = dict( | |
| batch_size=1, | |
| dataset=dict( | |
| data_root='./data/u4k', | |
| max_depth=80, | |
| min_depth=0.001, | |
| mode='infer', | |
| split='./data/u4k/splits/test_out.txt', | |
| transform_cfg=dict(network_process_size=[ | |
| 384, | |
| 512, | |
| ]), | |
| type='UnrealStereo4kDataset'), | |
| num_workers=2) | |
| train_cfg = dict( | |
| eval_start=0, | |
| log_interval=100, | |
| max_epochs=16, | |
| save_checkpoint_interval=16, | |
| train_log_img_interval=500, | |
| val_interval=2, | |
| val_log_img_interval=50, | |
| val_type='epoch_base') | |
| train_dataloader = dict( | |
| batch_size=4, | |
| dataset=dict( | |
| data_root='./data/u4k', | |
| max_depth=80, | |
| min_depth=0.001, | |
| mode='train', | |
| resize_mode='depth-anything', | |
| split='./data/u4k/splits/train.txt', | |
| transform_cfg=dict( | |
| degree=1.0, | |
| network_process_size=[ | |
| 392, | |
| 518, | |
| ], | |
| random_crop=True, | |
| random_crop_size=( | |
| 540, | |
| 960, | |
| )), | |
| type='UnrealStereo4kDataset'), | |
| num_workers=4) | |
| val_dataloader = dict( | |
| batch_size=1, | |
| dataset=dict( | |
| data_root='./data/u4k', | |
| max_depth=80, | |
| min_depth=0.001, | |
| mode='infer', | |
| resize_mode='depth-anything', | |
| split='./data/u4k/splits/val.txt', | |
| transform_cfg=dict( | |
| degree=1.0, | |
| network_process_size=[ | |
| 392, | |
| 518, | |
| ], | |
| random_crop_size=( | |
| 540, | |
| 960, | |
| )), | |
| type='UnrealStereo4kDataset'), | |
| num_workers=2) | |
| work_dir = './work_dir/depthanything_vitb_u4k/patchfusion' | |
| zoe_depth_config = dict( | |
| attractor_alpha=1000, | |
| attractor_gamma=2, | |
| attractor_kind='mean', | |
| attractor_type='inv', | |
| aug=True, | |
| bin_centers_type='softplus', | |
| bin_embedding_dim=128, | |
| clip_grad=0.1, | |
| dataset='nyu', | |
| depth_anything=True, | |
| distributed=True, | |
| do_resize=False, | |
| force_keep_ar=True, | |
| freeze_midas_bn=True, | |
| gpu='NULL', | |
| img_size=[ | |
| 392, | |
| 518, | |
| ], | |
| inverse_midas=False, | |
| log_images_every=0.1, | |
| max_depth=80, | |
| max_temp=50.0, | |
| max_translation=100, | |
| memory_efficient=True, | |
| midas_model_type='vitb', | |
| min_depth=0.001, | |
| min_temp=0.0212, | |
| model='zoedepth', | |
| n_attractors=[ | |
| 16, | |
| 8, | |
| 4, | |
| 1, | |
| ], | |
| n_bins=64, | |
| name='ZoeDepth', | |
| notes='', | |
| output_distribution='logbinomial', | |
| prefetch=False, | |
| pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', | |
| print_losses=False, | |
| project='ZoeDepth', | |
| random_crop=False, | |
| random_translate=False, | |
| root='.', | |
| save_dir='', | |
| shared_dict='NULL', | |
| tags='', | |
| train_midas=True, | |
| translate_prob=0.2, | |
| type='DA-ZoeDepth', | |
| uid='NULL', | |
| use_amp=False, | |
| use_pretrained_midas=True, | |
| use_shared_dict=False, | |
| validate_every=0.25, | |
| version_name='v1', | |
| workers=16) | |