| collect_input_args = [ |
| 'image_lr', |
| 'crops_image_hr', |
| 'depth_gt', |
| 'crop_depths', |
| 'bboxs', |
| 'image_hr', |
| ] |
| convert_syncbn = True |
| debug = False |
| env_cfg = dict( |
| cudnn_benchmark=True, |
| dist_cfg=dict(backend='nccl'), |
| mp_cfg=dict(mp_start_method='forkserver')) |
| find_unused_parameters = True |
| general_dataloader = dict( |
| batch_size=1, |
| dataset=dict( |
| dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'), |
| num_workers=2) |
| launcher = 'pytorch' |
| log_name = 'fine_pretrain' |
| max_depth = 80 |
| min_depth = 0.001 |
| model = dict( |
| coarse_branch=dict( |
| attractor_alpha=1000, |
| attractor_gamma=2, |
| attractor_kind='mean', |
| attractor_type='inv', |
| aug=True, |
| bin_centers_type='softplus', |
| bin_embedding_dim=128, |
| clip_grad=0.1, |
| dataset='nyu', |
| depth_anything=True, |
| distributed=True, |
| do_resize=False, |
| force_keep_ar=True, |
| freeze_midas_bn=True, |
| gpu='NULL', |
| img_size=[ |
| 392, |
| 518, |
| ], |
| inverse_midas=False, |
| log_images_every=0.1, |
| max_depth=80, |
| max_temp=50.0, |
| max_translation=100, |
| memory_efficient=True, |
| midas_model_type='vitb', |
| min_depth=0.001, |
| min_temp=0.0212, |
| model='zoedepth', |
| n_attractors=[ |
| 16, |
| 8, |
| 4, |
| 1, |
| ], |
| n_bins=64, |
| name='ZoeDepth', |
| notes='', |
| output_distribution='logbinomial', |
| prefetch=False, |
| pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', |
| print_losses=False, |
| project='ZoeDepth', |
| random_crop=False, |
| random_translate=False, |
| root='.', |
| save_dir='', |
| shared_dict='NULL', |
| tags='', |
| train_midas=True, |
| translate_prob=0.2, |
| type='DA-ZoeDepth', |
| uid='NULL', |
| use_amp=False, |
| use_pretrained_midas=True, |
| use_shared_dict=False, |
| validate_every=0.25, |
| version_name='v1', |
| workers=16), |
| fine_branch=dict( |
| attractor_alpha=1000, |
| attractor_gamma=2, |
| attractor_kind='mean', |
| attractor_type='inv', |
| aug=True, |
| bin_centers_type='softplus', |
| bin_embedding_dim=128, |
| clip_grad=0.1, |
| dataset='nyu', |
| depth_anything=True, |
| distributed=True, |
| do_resize=False, |
| force_keep_ar=True, |
| freeze_midas_bn=True, |
| gpu='NULL', |
| img_size=[ |
| 392, |
| 518, |
| ], |
| inverse_midas=False, |
| log_images_every=0.1, |
| max_depth=80, |
| max_temp=50.0, |
| max_translation=100, |
| memory_efficient=True, |
| midas_model_type='vitb', |
| min_depth=0.001, |
| min_temp=0.0212, |
| model='zoedepth', |
| n_attractors=[ |
| 16, |
| 8, |
| 4, |
| 1, |
| ], |
| n_bins=64, |
| name='ZoeDepth', |
| notes='', |
| output_distribution='logbinomial', |
| prefetch=False, |
| pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', |
| print_losses=False, |
| project='ZoeDepth', |
| random_crop=False, |
| random_translate=False, |
| root='.', |
| save_dir='', |
| shared_dict='NULL', |
| tags='', |
| train_midas=True, |
| translate_prob=0.2, |
| type='DA-ZoeDepth', |
| uid='NULL', |
| use_amp=False, |
| use_pretrained_midas=True, |
| use_shared_dict=False, |
| validate_every=0.25, |
| version_name='v1', |
| workers=16), |
| max_depth=80, |
| min_depth=0.001, |
| patch_process_shape=( |
| 392, |
| 518, |
| ), |
| sigloss=dict(type='SILogLoss'), |
| target='fine', |
| type='BaselinePretrain') |
| optim_wrapper = dict( |
| clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'), |
| optimizer=dict(lr=4e-06, type='AdamW', weight_decay=0.01), |
| paramwise_cfg=dict(bypass_duplicate=True, custom_keys=dict())) |
| param_scheduler = dict( |
| base_momentum=0.85, |
| cycle_momentum=True, |
| div_factor=1, |
| final_div_factor=10000, |
| max_momentum=0.95, |
| pct_start=0.5, |
| three_phase=False) |
| project = 'patchfusion' |
| resume = False |
| tags = [ |
| 'fine', |
| 'da', |
| 'vitb', |
| ] |
| test_in_dataloader = dict( |
| batch_size=1, |
| dataset=dict( |
| data_root='./data/u4k', |
| max_depth=80, |
| min_depth=0.001, |
| mode='infer', |
| split='./data/u4k/splits/test.txt', |
| transform_cfg=dict(network_process_size=[ |
| 384, |
| 512, |
| ]), |
| type='UnrealStereo4kDataset'), |
| num_workers=2) |
| test_out_dataloader = dict( |
| batch_size=1, |
| dataset=dict( |
| data_root='./data/u4k', |
| max_depth=80, |
| min_depth=0.001, |
| mode='infer', |
| split='./data/u4k/splits/test_out.txt', |
| transform_cfg=dict(network_process_size=[ |
| 384, |
| 512, |
| ]), |
| type='UnrealStereo4kDataset'), |
| num_workers=2) |
| train_cfg = dict( |
| eval_start=0, |
| log_interval=100, |
| max_epochs=24, |
| save_checkpoint_interval=24, |
| train_log_img_interval=500, |
| val_interval=2, |
| val_log_img_interval=50, |
| val_type='epoch_base') |
| train_dataloader = dict( |
| batch_size=4, |
| dataset=dict( |
| data_root='./data/u4k', |
| max_depth=80, |
| min_depth=0.001, |
| mode='train', |
| resize_mode='depth-anything', |
| split='./data/u4k/splits/train.txt', |
| transform_cfg=dict( |
| degree=1.0, |
| network_process_size=[ |
| 392, |
| 518, |
| ], |
| random_crop=True, |
| random_crop_size=( |
| 540, |
| 960, |
| )), |
| type='UnrealStereo4kDataset'), |
| num_workers=4) |
| val_dataloader = dict( |
| batch_size=1, |
| dataset=dict( |
| data_root='./data/u4k', |
| max_depth=80, |
| min_depth=0.001, |
| mode='infer', |
| resize_mode='depth-anything', |
| split='./data/u4k/splits/val.txt', |
| transform_cfg=dict( |
| degree=1.0, |
| network_process_size=[ |
| 392, |
| 518, |
| ], |
| random_crop_size=( |
| 540, |
| 960, |
| )), |
| type='UnrealStereo4kDataset'), |
| num_workers=2) |
| work_dir = './work_dir/depthanything_vitb_u4k/fine_pretrain' |
| zoe_depth_config = dict( |
| attractor_alpha=1000, |
| attractor_gamma=2, |
| attractor_kind='mean', |
| attractor_type='inv', |
| aug=True, |
| bin_centers_type='softplus', |
| bin_embedding_dim=128, |
| clip_grad=0.1, |
| dataset='nyu', |
| depth_anything=True, |
| distributed=True, |
| do_resize=False, |
| force_keep_ar=True, |
| freeze_midas_bn=True, |
| gpu='NULL', |
| img_size=[ |
| 392, |
| 518, |
| ], |
| inverse_midas=False, |
| log_images_every=0.1, |
| max_depth=80, |
| max_temp=50.0, |
| max_translation=100, |
| memory_efficient=True, |
| midas_model_type='vitb', |
| min_depth=0.001, |
| min_temp=0.0212, |
| model='zoedepth', |
| n_attractors=[ |
| 16, |
| 8, |
| 4, |
| 1, |
| ], |
| n_bins=64, |
| name='ZoeDepth', |
| notes='', |
| output_distribution='logbinomial', |
| prefetch=False, |
| pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', |
| print_losses=False, |
| project='ZoeDepth', |
| random_crop=False, |
| random_translate=False, |
| root='.', |
| save_dir='', |
| shared_dict='NULL', |
| tags='', |
| train_midas=True, |
| translate_prob=0.2, |
| type='DA-ZoeDepth', |
| uid='NULL', |
| use_amp=False, |
| use_pretrained_midas=True, |
| use_shared_dict=False, |
| validate_every=0.25, |
| version_name='v1', |
| workers=16) |
|
|