Spaces:
Running
on
Zero
Running
on
Zero
| COCO_ROOT = "path/to/COCO/" | |
| MPII_ROOT = "path/to/MPII/" | |
| AIC_ROOT = "path/to/AIC/" | |
| OCHUMAN_ROOT = "path/to/OCHuman/" | |
| BATCH_SIZE = 64 | |
| COCO_NAME = "COCO" | |
| MPII_NAME = "MPII" | |
| AIC_NAME = "AIC" | |
| OCHUMAN_NAME = "OCHuman" | |
| _base_ = ['../_base_/default_runtime.py'] | |
| # resume = True | |
| load_from = "work_dirs/ViTb-multi/epoch_210.pth" | |
| # runtime | |
| train_cfg = dict(max_epochs=210, val_interval=5) | |
| # optimizer | |
| custom_imports = dict( | |
| imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], | |
| allow_failed_imports=False) | |
| optim_wrapper = dict( | |
| optimizer=dict( | |
| type='AdamW', lr=5e-4*BATCH_SIZE/64, betas=(0.9, 0.999), weight_decay=0.1), | |
| paramwise_cfg=dict( | |
| num_layers=12, | |
| layer_decay_rate=0.75, | |
| custom_keys={ | |
| 'bias': dict(decay_multi=0.0), | |
| 'pos_embed': dict(decay_mult=0.0), | |
| 'relative_position_bias_table': dict(decay_mult=0.0), | |
| 'norm': dict(decay_mult=0.0), | |
| }, | |
| ), | |
| constructor='LayerDecayOptimWrapperConstructor', | |
| clip_grad=dict(max_norm=1., norm_type=2), | |
| ) | |
| # learning policy | |
| param_scheduler = [ | |
| dict( | |
| type='LinearLR', begin=0, end=500, start_factor=0.001, | |
| by_epoch=False), # warm-up | |
| dict( | |
| type='MultiStepLR', | |
| begin=0, | |
| end=210, | |
| milestones=[170, 200], | |
| gamma=0.1, | |
| by_epoch=True) | |
| ] | |
| # automatically scaling LR based on the actual training batch size | |
| auto_scale_lr = dict(base_batch_size=512) | |
| # hooks | |
| default_hooks = dict( | |
| checkpoint=dict(save_best='{}/AP'.format(COCO_NAME), rule='greater', max_keep_ckpts=1)) | |
| # codec settings | |
| codec = dict( | |
| type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) | |
| # model settings | |
| model = dict( | |
| type='TopdownPoseEstimator', | |
| data_preprocessor=dict( | |
| type='PoseDataPreprocessor', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| bgr_to_rgb=True), | |
| backbone=dict( | |
| type='mmpretrain.VisionTransformer', | |
| arch='base', | |
| img_size=(256, 192), | |
| patch_size=16, | |
| qkv_bias=True, | |
| drop_path_rate=0.3, | |
| with_cls_token=False, | |
| out_type='featmap', | |
| patch_cfg=dict(padding=2), | |
| init_cfg=None, | |
| # init_cfg=dict( | |
| # type='Pretrained', | |
| # checkpoint='models/pretrained/mae_pretrain_vit_base_20230913.pth'), | |
| ), | |
| head=dict( | |
| type='HeatmapHead', | |
| in_channels=768, | |
| out_channels=21, | |
| deconv_out_channels=(256, 256), | |
| deconv_kernel_sizes=(4, 4), | |
| loss=dict(type='KeypointMSELoss', use_target_weight=True), | |
| decoder=codec), | |
| test_cfg=dict( | |
| flip_test=True, | |
| flip_mode='heatmap', | |
| shift_heatmap=False, | |
| )) | |
| # pipelines | |
| train_pipeline = [ | |
| dict(type='LoadImage'), | |
| dict(type='GetBBoxCenterScale'), | |
| dict( | |
| type='MaskBackground', | |
| prob=1.0, | |
| continue_on_failure=False, | |
| alpha=0.2, | |
| dilate_prob=0.5, | |
| dilate_amount=0.1, | |
| erode_prob=0.5, | |
| erode_amount=0.5, | |
| ), | |
| dict(type='RandomFlip', direction='horizontal'), | |
| dict(type='RandomHalfBody'), | |
| dict(type='RandomBBoxTransform'), | |
| dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), | |
| dict(type='GenerateTarget', encoder=codec), | |
| dict(type='PackPoseInputs') | |
| ] | |
| val_pipeline = [ | |
| dict(type='LoadImage'), | |
| dict(type='GetBBoxCenterScale'), | |
| dict(type='MaskBackground', continue_on_failure=False, alpha=0.2), | |
| dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), | |
| dict(type='PackPoseInputs') | |
| ] | |
| # # base dataset settings | |
| # data_root = TRAIN_ROOT | |
| # val_data_root = VAL_ROOT | |
| # dataset_type = 'CocoDataset' | |
| # data_mode = 'topdown' | |
| coco_train_dataset = dict( | |
| type="CocoDataset", | |
| data_root=COCO_ROOT, | |
| data_mode="topdown", | |
| ann_file='annotations/person_keypoints_train2017.json', | |
| data_prefix=dict(img='train2017/'), | |
| pipeline=[], | |
| test_mode=False, | |
| ) | |
| coco_val_dataset = dict( | |
| type="CocoDataset", | |
| data_root=COCO_ROOT, | |
| data_mode="topdown", | |
| ann_file="annotations/person_keypoints_val2017.json", | |
| bbox_file=COCO_ROOT + "/detections/rtmdet-l-ins-mask.json", | |
| filter_cfg=dict(bbox_score_thr=0.3), | |
| data_prefix=dict(img='val2017/'), | |
| pipeline=[], | |
| test_mode=True, | |
| ) | |
| mpii_train_dataset = dict( | |
| type="MpiiDataset", | |
| data_root=MPII_ROOT, | |
| data_mode="topdown", | |
| ann_file="annotations/mpii_sam_train.json", | |
| data_prefix=dict(img='images/'), | |
| pipeline=[], | |
| test_mode=False, | |
| ) | |
| mpii_val_dataset = dict( | |
| type="MpiiDataset", | |
| data_root=MPII_ROOT, | |
| data_mode="topdown", | |
| ann_file="annotations/mpii_sam_val.json", | |
| data_prefix=dict(img='images/'), | |
| pipeline=[], | |
| test_mode=True, | |
| ) | |
| aic_train_dataset = dict( | |
| type="AicDataset", | |
| data_root=AIC_ROOT, | |
| data_mode="topdown", | |
| ann_file="annotations/aic_sam_train.json", | |
| data_prefix=dict(img='images/'), | |
| pipeline=[], | |
| test_mode=False, | |
| ) | |
| aic_val_dataset = dict( | |
| type="AicDataset", | |
| data_root=AIC_ROOT, | |
| data_mode="topdown", | |
| ann_file="annotations/aic_sam_val.json", | |
| data_prefix=dict(img='images/'), | |
| pipeline=[], | |
| test_mode=True, | |
| ) | |
| ochuman_val_dataset = dict( | |
| type="OCHumanDataset", | |
| data_root=OCHUMAN_ROOT, | |
| data_mode="topdown", | |
| ann_file="annotations/person_keypoints_val2017.json", | |
| data_prefix=dict(img='val2017/'), | |
| # bbox_file=OCHUMAN_ROOT + "/detections/rtmdet-l-ins.json", | |
| # filter_cfg=dict(bbox_score_thr=0.3), | |
| pipeline=[], | |
| test_mode=True, | |
| ) | |
| combined_val_dataset = dict( | |
| type='CombinedDataset', | |
| metainfo=dict(from_file='configs/_base_/datasets/merged_COCO_AIC_MPII.py'), | |
| datasets=[coco_val_dataset, mpii_val_dataset, aic_val_dataset, ochuman_val_dataset], | |
| pipeline=val_pipeline, | |
| test_mode=True, | |
| keypoints_mapping=[ | |
| {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, | |
| 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for COCO as merged is based on COCO | |
| {0: 16, 1: 14, 2: 12, 3: 11, 4: 13, 5: 15, 6: 20, 7: 17, 8: 18, | |
| 9: 19, 10: 10, 11: 8, 12: 6, 13: 5, 14: 7, 15: 9}, # MPII -> COCO and additional points | |
| {0: 6, 1: 8, 2: 10, 3: 5, 4: 7, 5: 9, 6: 12, 7: 14, 8: 16, | |
| 9: 11, 10: 13, 11: 15, 12: 19, 13: 17}, # AIC -> COCO and additional points | |
| {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, | |
| 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for OCHuman as merged is based on COCO | |
| ], | |
| ) | |
| combined_train_dataset = dict( | |
| type='CombinedDataset', | |
| metainfo=dict(from_file='configs/_base_/datasets/merged_COCO_AIC_MPII.py'), | |
| datasets=[coco_train_dataset, mpii_train_dataset, aic_train_dataset], | |
| pipeline=train_pipeline, | |
| test_mode=False, | |
| keypoints_mapping=[ | |
| {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, | |
| 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for COCO as merged is based on COCO | |
| {0: 16, 1: 14, 2: 12, 3: 11, 4: 13, 5: 15, 6: 20, 7: 17, 8: 18, | |
| 9: 19, 10: 10, 11: 8, 12: 6, 13: 5, 14: 7, 15: 9}, # MPII -> COCO and additional points | |
| {0: 6, 1: 8, 2: 10, 3: 5, 4: 7, 5: 9, 6: 12, 7: 14, 8: 16, | |
| 9: 11, 10: 13, 11: 15, 12: 19, 13: 17}, # AIC -> COCO and additional points | |
| ], | |
| ) | |
| # data loaders | |
| train_dataloader = dict( | |
| batch_size=BATCH_SIZE, | |
| num_workers=8, | |
| persistent_workers=True, | |
| sampler=dict( | |
| type='MultiSourceSampler', | |
| batch_size=BATCH_SIZE, | |
| source_ratio=[1, 1, 1], | |
| shuffle=True, | |
| ), | |
| dataset=combined_train_dataset, | |
| ) | |
| val_dataloader = dict( | |
| batch_size=128, | |
| num_workers=8, | |
| persistent_workers=True, | |
| drop_last=False, | |
| sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), | |
| dataset=combined_val_dataset, | |
| ) | |
| test_dataloader = val_dataloader | |
| # evaluators | |
| val_evaluator = dict( | |
| type='MultiDatasetEvaluator', | |
| metrics=[ | |
| dict(type='CocoMetric', | |
| ann_file=COCO_ROOT + 'annotations/person_keypoints_val2017.json', | |
| prefix=COCO_NAME, | |
| nms_mode='none', | |
| outfile_prefix='COCO_MaskPose', | |
| ignore_stats=['AP .5', 'AP .75', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'], | |
| ), | |
| dict(type='PCKAccuracy', | |
| prefix=MPII_NAME, | |
| ), | |
| dict(type='PCKAccuracy', | |
| prefix=AIC_NAME, | |
| ), | |
| dict(type='CocoMetric', | |
| ann_file=OCHUMAN_ROOT + 'annotations/person_keypoints_val2017.json', | |
| prefix=OCHUMAN_NAME, | |
| outfile_prefix='ochuman', | |
| nms_mode='none', | |
| ignore_stats=['AP .5', 'AP .75', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'], | |
| ), | |
| ], | |
| datasets=combined_val_dataset['datasets'], | |
| ) | |
| test_evaluator = val_evaluator | |