Spaces:
Configuration error
Configuration error
| # runtime | |
| max_epochs = 270 | |
| stage2_num_epochs = 30 | |
| base_lr = 4e-3 | |
| train_cfg = dict(max_epochs=max_epochs, val_interval=10) | |
| randomness = dict(seed=21) | |
| # optimizer | |
| optim_wrapper = dict( | |
| type='OptimWrapper', | |
| optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), | |
| paramwise_cfg=dict( | |
| norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) | |
| # learning rate | |
| param_scheduler = [ | |
| dict( | |
| type='LinearLR', | |
| start_factor=1.0e-5, | |
| by_epoch=False, | |
| begin=0, | |
| end=1000), | |
| dict( | |
| # use cosine lr from 150 to 300 epoch | |
| type='CosineAnnealingLR', | |
| eta_min=base_lr * 0.05, | |
| begin=max_epochs // 2, | |
| end=max_epochs, | |
| T_max=max_epochs // 2, | |
| by_epoch=True, | |
| convert_to_iter_based=True), | |
| ] | |
| # automatically scaling LR based on the actual training batch size | |
| auto_scale_lr = dict(base_batch_size=512) | |
| # codec settings | |
| codec = dict( | |
| type='SimCCLabel', | |
| input_size=(288, 384), | |
| sigma=(6., 6.93), | |
| simcc_split_ratio=2.0, | |
| normalize=False, | |
| use_dark=False) | |
| # model settings | |
| model = dict( | |
| type='TopdownPoseEstimator', | |
| data_preprocessor=dict( | |
| type='PoseDataPreprocessor', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| bgr_to_rgb=True), | |
| backbone=dict( | |
| _scope_='mmdet', | |
| type='CSPNeXt', | |
| arch='P5', | |
| expand_ratio=0.5, | |
| deepen_factor=1., | |
| widen_factor=1., | |
| out_indices=(4, ), | |
| channel_attention=True, | |
| norm_cfg=dict(type='SyncBN'), | |
| act_cfg=dict(type='SiLU'), | |
| init_cfg=dict( | |
| type='Pretrained', | |
| prefix='backbone.', | |
| checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' | |
| 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa | |
| )), | |
| head=dict( | |
| type='RTMCCHead', | |
| in_channels=1024, | |
| out_channels=133, | |
| input_size=codec['input_size'], | |
| in_featuremap_size=(9, 12), | |
| simcc_split_ratio=codec['simcc_split_ratio'], | |
| final_layer_kernel_size=7, | |
| gau_cfg=dict( | |
| hidden_dims=256, | |
| s=128, | |
| expansion_factor=2, | |
| dropout_rate=0., | |
| drop_path=0., | |
| act_fn='SiLU', | |
| use_rel_bias=False, | |
| pos_enc=False), | |
| loss=dict( | |
| type='KLDiscretLoss', | |
| use_target_weight=True, | |
| beta=10., | |
| label_softmax=True), | |
| decoder=codec), | |
| test_cfg=dict(flip_test=True, )) | |
| # base dataset settings | |
| dataset_type = 'CocoWholeBodyDataset' | |
| data_mode = 'topdown' | |
| data_root = '/data/' | |
| backend_args = dict(backend='local') | |
| # backend_args = dict( | |
| # backend='petrel', | |
| # path_mapping=dict({ | |
| # f'{data_root}': 's3://openmmlab/datasets/detection/coco/', | |
| # f'{data_root}': 's3://openmmlab/datasets/detection/coco/' | |
| # })) | |
| # pipelines | |
| train_pipeline = [ | |
| dict(type='LoadImage', backend_args=backend_args), | |
| dict(type='GetBBoxCenterScale'), | |
| dict(type='RandomFlip', direction='horizontal'), | |
| dict(type='RandomHalfBody'), | |
| dict( | |
| type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), | |
| dict(type='TopdownAffine', input_size=codec['input_size']), | |
| dict(type='mmdet.YOLOXHSVRandomAug'), | |
| dict( | |
| type='Albumentation', | |
| transforms=[ | |
| dict(type='Blur', p=0.1), | |
| dict(type='MedianBlur', p=0.1), | |
| dict( | |
| type='CoarseDropout', | |
| max_holes=1, | |
| max_height=0.4, | |
| max_width=0.4, | |
| min_holes=1, | |
| min_height=0.2, | |
| min_width=0.2, | |
| p=1.0), | |
| ]), | |
| dict(type='GenerateTarget', encoder=codec), | |
| dict(type='PackPoseInputs') | |
| ] | |
| val_pipeline = [ | |
| dict(type='LoadImage', backend_args=backend_args), | |
| dict(type='GetBBoxCenterScale'), | |
| dict(type='TopdownAffine', input_size=codec['input_size']), | |
| dict(type='PackPoseInputs') | |
| ] | |
| train_pipeline_stage2 = [ | |
| dict(type='LoadImage', backend_args=backend_args), | |
| dict(type='GetBBoxCenterScale'), | |
| dict(type='RandomFlip', direction='horizontal'), | |
| dict(type='RandomHalfBody'), | |
| dict( | |
| type='RandomBBoxTransform', | |
| shift_factor=0., | |
| scale_factor=[0.75, 1.25], | |
| rotate_factor=60), | |
| dict(type='TopdownAffine', input_size=codec['input_size']), | |
| dict(type='mmdet.YOLOXHSVRandomAug'), | |
| dict( | |
| type='Albumentation', | |
| transforms=[ | |
| dict(type='Blur', p=0.1), | |
| dict(type='MedianBlur', p=0.1), | |
| dict( | |
| type='CoarseDropout', | |
| max_holes=1, | |
| max_height=0.4, | |
| max_width=0.4, | |
| min_holes=1, | |
| min_height=0.2, | |
| min_width=0.2, | |
| p=0.5), | |
| ]), | |
| dict(type='GenerateTarget', encoder=codec), | |
| dict(type='PackPoseInputs') | |
| ] | |
| datasets = [] | |
| dataset_coco=dict( | |
| type=dataset_type, | |
| data_root=data_root, | |
| data_mode=data_mode, | |
| ann_file='coco/annotations/coco_wholebody_train_v1.0.json', | |
| data_prefix=dict(img='coco/train2017/'), | |
| pipeline=[], | |
| ) | |
| datasets.append(dataset_coco) | |
| scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class', | |
| 'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow', | |
| 'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference'] | |
| for i in range(len(scene)): | |
| datasets.append( | |
| dict( | |
| type=dataset_type, | |
| data_root=data_root, | |
| data_mode=data_mode, | |
| ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json', | |
| data_prefix=dict(img='UBody/images/'+scene[i]+'/'), | |
| pipeline=[], | |
| ) | |
| ) | |
| # data loaders | |
| train_dataloader = dict( | |
| batch_size=32, | |
| num_workers=10, | |
| persistent_workers=True, | |
| sampler=dict(type='DefaultSampler', shuffle=True), | |
| dataset=dict( | |
| type='CombinedDataset', | |
| metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'), | |
| datasets=datasets, | |
| pipeline=train_pipeline, | |
| test_mode=False, | |
| )) | |
| val_dataloader = dict( | |
| batch_size=32, | |
| num_workers=10, | |
| persistent_workers=True, | |
| drop_last=False, | |
| sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), | |
| dataset=dict( | |
| type=dataset_type, | |
| data_root=data_root, | |
| data_mode=data_mode, | |
| ann_file='coco/annotations/coco_wholebody_val_v1.0.json', | |
| bbox_file=f'{data_root}coco/person_detection_results/' | |
| 'COCO_val2017_detections_AP_H_56_person.json', | |
| data_prefix=dict(img='coco/val2017/'), | |
| test_mode=True, | |
| pipeline=val_pipeline, | |
| )) | |
| test_dataloader = val_dataloader | |
| # hooks | |
| default_hooks = dict( | |
| checkpoint=dict( | |
| save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) | |
| custom_hooks = [ | |
| dict( | |
| type='EMAHook', | |
| ema_type='ExpMomentumEMA', | |
| momentum=0.0002, | |
| update_buffers=True, | |
| priority=49), | |
| dict( | |
| type='mmdet.PipelineSwitchHook', | |
| switch_epoch=max_epochs - stage2_num_epochs, | |
| switch_pipeline=train_pipeline_stage2) | |
| ] | |
| # evaluators | |
| val_evaluator = dict( | |
| type='CocoWholeBodyMetric', | |
| ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json') | |
| test_evaluator = val_evaluator |