fparodi commited on
Commit
4fade59
·
verified ·
1 Parent(s): 8ebc26d

Upload pose/vitpose_base_68kpt_config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. pose/vitpose_base_68kpt_config.py +403 -0
pose/vitpose_base_68kpt_config.py ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmpose'
2
+ default_hooks = dict(
3
+ timer=dict(type='IterTimerHook'),
4
+ logger=dict(type='LoggerHook', interval=50),
5
+ param_scheduler=dict(type='ParamSchedulerHook'),
6
+ checkpoint=dict(
7
+ type='CheckpointHook',
8
+ interval=10,
9
+ save_best='NME',
10
+ rule='less',
11
+ max_keep_ckpts=2),
12
+ sampler_seed=dict(type='DistSamplerSeedHook'),
13
+ visualization=dict(type='PoseVisualizationHook', enable=False))
14
+ custom_hooks = [dict(type='SyncBuffersHook')]
15
+ env_cfg = dict(
16
+ cudnn_benchmark=False,
17
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
18
+ dist_cfg=dict(backend='nccl'))
19
+ vis_backends = [dict(type='LocalVisBackend')]
20
+ visualizer = dict(
21
+ type='PoseLocalVisualizer',
22
+ vis_backends=[dict(type='LocalVisBackend')],
23
+ name='visualizer')
24
+ log_processor = dict(
25
+ type='LogProcessor', window_size=50, by_epoch=True, num_digits=6)
26
+ log_level = 'INFO'
27
+ # load_from = 'Y:\\MacFace\\results\\coco_whface\\best_NME_epoch_20.pth'
28
+ resume = False
29
+ backend_args = dict(backend='local')
30
+ train_cfg = dict(by_epoch=True, max_epochs=210, val_interval=1)
31
+ val_cfg = dict()
32
+ test_cfg = dict()
33
+ dataset_info = dict(
34
+ dataset_name='coco_wholebody_face',
35
+ paper_info=dict(
36
+ author=
37
+ 'Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping',
38
+ title='Whole-Body Human Pose Estimation in the Wild',
39
+ container=
40
+ 'Proceedings of the European Conference on Computer Vision (ECCV)',
41
+ year='2020',
42
+ homepage='https://github.com/jin-s13/COCO-WholeBody/'),
43
+ keypoint_info=dict({
44
+ 0:
45
+ dict(name='face-0', id=0, color=[255, 0, 0], type='', swap='face-16'),
46
+ 1:
47
+ dict(name='face-1', id=1, color=[255, 0, 0], type='', swap='face-15'),
48
+ 2:
49
+ dict(name='face-2', id=2, color=[255, 0, 0], type='', swap='face-14'),
50
+ 3:
51
+ dict(name='face-3', id=3, color=[255, 0, 0], type='', swap='face-13'),
52
+ 4:
53
+ dict(name='face-4', id=4, color=[255, 0, 0], type='', swap='face-12'),
54
+ 5:
55
+ dict(name='face-5', id=5, color=[255, 0, 0], type='', swap='face-11'),
56
+ 6:
57
+ dict(name='face-6', id=6, color=[255, 0, 0], type='', swap='face-10'),
58
+ 7:
59
+ dict(name='face-7', id=7, color=[255, 0, 0], type='', swap='face-9'),
60
+ 8:
61
+ dict(name='face-8', id=8, color=[255, 0, 0], type='', swap=''),
62
+ 9:
63
+ dict(name='face-9', id=9, color=[255, 0, 0], type='', swap='face-7'),
64
+ 10:
65
+ dict(name='face-10', id=10, color=[255, 0, 0], type='', swap='face-6'),
66
+ 11:
67
+ dict(name='face-11', id=11, color=[255, 0, 0], type='', swap='face-5'),
68
+ 12:
69
+ dict(name='face-12', id=12, color=[255, 0, 0], type='', swap='face-4'),
70
+ 13:
71
+ dict(name='face-13', id=13, color=[255, 0, 0], type='', swap='face-3'),
72
+ 14:
73
+ dict(name='face-14', id=14, color=[255, 0, 0], type='', swap='face-2'),
74
+ 15:
75
+ dict(name='face-15', id=15, color=[255, 0, 0], type='', swap='face-1'),
76
+ 16:
77
+ dict(name='face-16', id=16, color=[255, 0, 0], type='', swap='face-0'),
78
+ 17:
79
+ dict(
80
+ name='face-17', id=17, color=[255, 0, 0], type='', swap='face-26'),
81
+ 18:
82
+ dict(
83
+ name='face-18', id=18, color=[255, 0, 0], type='', swap='face-25'),
84
+ 19:
85
+ dict(
86
+ name='face-19', id=19, color=[255, 0, 0], type='', swap='face-24'),
87
+ 20:
88
+ dict(
89
+ name='face-20', id=20, color=[255, 0, 0], type='', swap='face-23'),
90
+ 21:
91
+ dict(
92
+ name='face-21', id=21, color=[255, 0, 0], type='', swap='face-22'),
93
+ 22:
94
+ dict(
95
+ name='face-22', id=22, color=[255, 0, 0], type='', swap='face-21'),
96
+ 23:
97
+ dict(
98
+ name='face-23', id=23, color=[255, 0, 0], type='', swap='face-20'),
99
+ 24:
100
+ dict(
101
+ name='face-24', id=24, color=[255, 0, 0], type='', swap='face-19'),
102
+ 25:
103
+ dict(
104
+ name='face-25', id=25, color=[255, 0, 0], type='', swap='face-18'),
105
+ 26:
106
+ dict(
107
+ name='face-26', id=26, color=[255, 0, 0], type='', swap='face-17'),
108
+ 27:
109
+ dict(name='face-27', id=27, color=[255, 0, 0], type='', swap=''),
110
+ 28:
111
+ dict(name='face-28', id=28, color=[255, 0, 0], type='', swap=''),
112
+ 29:
113
+ dict(name='face-29', id=29, color=[255, 0, 0], type='', swap=''),
114
+ 30:
115
+ dict(name='face-30', id=30, color=[255, 0, 0], type='', swap=''),
116
+ 31:
117
+ dict(
118
+ name='face-31', id=31, color=[255, 0, 0], type='', swap='face-35'),
119
+ 32:
120
+ dict(
121
+ name='face-32', id=32, color=[255, 0, 0], type='', swap='face-34'),
122
+ 33:
123
+ dict(name='face-33', id=33, color=[255, 0, 0], type='', swap=''),
124
+ 34:
125
+ dict(
126
+ name='face-34', id=34, color=[255, 0, 0], type='', swap='face-32'),
127
+ 35:
128
+ dict(
129
+ name='face-35', id=35, color=[255, 0, 0], type='', swap='face-31'),
130
+ 36:
131
+ dict(
132
+ name='face-36', id=36, color=[255, 0, 0], type='', swap='face-45'),
133
+ 37:
134
+ dict(
135
+ name='face-37', id=37, color=[255, 0, 0], type='', swap='face-44'),
136
+ 38:
137
+ dict(
138
+ name='face-38', id=38, color=[255, 0, 0], type='', swap='face-43'),
139
+ 39:
140
+ dict(
141
+ name='face-39', id=39, color=[255, 0, 0], type='', swap='face-42'),
142
+ 40:
143
+ dict(
144
+ name='face-40', id=40, color=[255, 0, 0], type='', swap='face-47'),
145
+ 41:
146
+ dict(
147
+ name='face-41', id=41, color=[255, 0, 0], type='', swap='face-46'),
148
+ 42:
149
+ dict(
150
+ name='face-42', id=42, color=[255, 0, 0], type='', swap='face-39'),
151
+ 43:
152
+ dict(
153
+ name='face-43', id=43, color=[255, 0, 0], type='', swap='face-38'),
154
+ 44:
155
+ dict(
156
+ name='face-44', id=44, color=[255, 0, 0], type='', swap='face-37'),
157
+ 45:
158
+ dict(
159
+ name='face-45', id=45, color=[255, 0, 0], type='', swap='face-36'),
160
+ 46:
161
+ dict(
162
+ name='face-46', id=46, color=[255, 0, 0], type='', swap='face-41'),
163
+ 47:
164
+ dict(
165
+ name='face-47', id=47, color=[255, 0, 0], type='', swap='face-40'),
166
+ 48:
167
+ dict(
168
+ name='face-48', id=48, color=[255, 0, 0], type='', swap='face-54'),
169
+ 49:
170
+ dict(
171
+ name='face-49', id=49, color=[255, 0, 0], type='', swap='face-53'),
172
+ 50:
173
+ dict(
174
+ name='face-50', id=50, color=[255, 0, 0], type='', swap='face-52'),
175
+ 51:
176
+ dict(name='face-51', id=52, color=[255, 0, 0], type='', swap=''),
177
+ 52:
178
+ dict(
179
+ name='face-52', id=52, color=[255, 0, 0], type='', swap='face-50'),
180
+ 53:
181
+ dict(
182
+ name='face-53', id=53, color=[255, 0, 0], type='', swap='face-49'),
183
+ 54:
184
+ dict(
185
+ name='face-54', id=54, color=[255, 0, 0], type='', swap='face-48'),
186
+ 55:
187
+ dict(
188
+ name='face-55', id=55, color=[255, 0, 0], type='', swap='face-59'),
189
+ 56:
190
+ dict(
191
+ name='face-56', id=56, color=[255, 0, 0], type='', swap='face-58'),
192
+ 57:
193
+ dict(name='face-57', id=57, color=[255, 0, 0], type='', swap=''),
194
+ 58:
195
+ dict(
196
+ name='face-58', id=58, color=[255, 0, 0], type='', swap='face-56'),
197
+ 59:
198
+ dict(
199
+ name='face-59', id=59, color=[255, 0, 0], type='', swap='face-55'),
200
+ 60:
201
+ dict(
202
+ name='face-60', id=60, color=[255, 0, 0], type='', swap='face-64'),
203
+ 61:
204
+ dict(
205
+ name='face-61', id=61, color=[255, 0, 0], type='', swap='face-63'),
206
+ 62:
207
+ dict(name='face-62', id=62, color=[255, 0, 0], type='', swap=''),
208
+ 63:
209
+ dict(
210
+ name='face-63', id=63, color=[255, 0, 0], type='', swap='face-61'),
211
+ 64:
212
+ dict(
213
+ name='face-64', id=64, color=[255, 0, 0], type='', swap='face-60'),
214
+ 65:
215
+ dict(
216
+ name='face-65', id=65, color=[255, 0, 0], type='', swap='face-67'),
217
+ 66:
218
+ dict(name='face-66', id=66, color=[255, 0, 0], type='', swap=''),
219
+ 67:
220
+ dict(
221
+ name='face-67', id=67, color=[255, 0, 0], type='', swap='face-65')
222
+ }),
223
+ skeleton_info=dict(),
224
+ joint_weights=[
225
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
226
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
227
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
228
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
229
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
230
+ ],
231
+ sigmas=[
232
+ 0.042, 0.043, 0.044, 0.043, 0.04, 0.035, 0.031, 0.025, 0.02, 0.023,
233
+ 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011,
234
+ 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007,
235
+ 0.007, 0.012, 0.009, 0.008, 0.016, 0.01, 0.017, 0.011, 0.009, 0.011,
236
+ 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.01, 0.034, 0.008, 0.008,
237
+ 0.009, 0.008, 0.008, 0.007, 0.01, 0.008, 0.009, 0.009, 0.009, 0.007,
238
+ 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008
239
+ ])
240
+ custom_imports = dict(
241
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
242
+ allow_failed_imports=False)
243
+ optim_wrapper = dict(
244
+ optimizer=dict(
245
+ type='AdamW', lr=1e-05, betas=(0.9, 0.999), weight_decay=0.1),
246
+ paramwise_cfg=dict(
247
+ num_layers=24,
248
+ layer_decay_rate=0.8,
249
+ custom_keys=dict(
250
+ bias=dict(decay_multi=0.0),
251
+ pos_embed=dict(decay_mult=0.0),
252
+ relative_position_bias_table=dict(decay_mult=0.0),
253
+ norm=dict(decay_mult=0.0))),
254
+ constructor='LayerDecayOptimWrapperConstructor',
255
+ clip_grad=dict(max_norm=1.0, norm_type=2),
256
+ type='AmpOptimWrapper',
257
+ loss_scale='dynamic')
258
+
259
+ param_scheduler = [
260
+ dict(
261
+ type='LinearLR', begin=0, end=500, start_factor=0.0001,
262
+ by_epoch=False),
263
+ dict(
264
+ type='MultiStepLR',
265
+ begin=0,
266
+ end=210,
267
+ milestones=[170, 200],
268
+ gamma=0.1,
269
+ by_epoch=True)
270
+ ]
271
+
272
+ auto_scale_lr = dict(base_batch_size=512)
273
+ codec = dict(
274
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
275
+
276
+ model = dict(
277
+ type='TopdownPoseEstimator',
278
+ data_preprocessor=dict(
279
+ type='PoseDataPreprocessor',
280
+ mean=[123.675, 116.28, 103.53],
281
+ std=[58.395, 57.12, 57.375],
282
+ bgr_to_rgb=True),
283
+ backbone=dict(
284
+ # type='mmcls.VisionTransformer',
285
+ type='mmpretrain.VisionTransformer',
286
+ arch='large',
287
+ img_size=(256, 192),
288
+ patch_size=16,
289
+ qkv_bias=True,
290
+ drop_path_rate=0.5,
291
+ with_cls_token=False,
292
+ out_type='featmap',
293
+ # output_cls_token=False,
294
+ patch_cfg=dict(padding=2),
295
+ init_cfg=dict(
296
+ type='Pretrained',
297
+ checkpoint=
298
+ r".\vitpose_cocowbf_pfv1_68kpts.pth"
299
+ )
300
+ ),
301
+ head=dict(
302
+ type='HeatmapHead',
303
+ in_channels=1024,
304
+ out_channels=68,
305
+ deconv_out_channels=(256, 256),
306
+ deconv_kernel_sizes=(4, 4),
307
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
308
+ decoder=dict(
309
+ type='UDPHeatmap',
310
+ input_size=(192, 256),
311
+ heatmap_size=(48, 64),
312
+ sigma=2)),
313
+ test_cfg=dict(flip_test=True, flip_mode='heatmap', shift_heatmap=False))
314
+
315
+
316
+ data_root = r"."
317
+ dataset_type = 'CocoWholeBodyFaceDataset'
318
+ data_mode = 'topdown'
319
+ train_pipeline = [
320
+ dict(type='LoadImage'),
321
+ dict(type='GetBBoxCenterScale'),
322
+ dict(type='RandomFlip', direction='horizontal'),
323
+ dict(type='RandomHalfBody'),
324
+ dict(type='RandomBBoxTransform'),
325
+ dict(type='TopdownAffine', input_size=(192, 256), use_udp=True),
326
+ dict(
327
+ type='GenerateTarget',
328
+ encoder=dict(
329
+ type='UDPHeatmap',
330
+ input_size=(192, 256),
331
+ heatmap_size=(48, 64),
332
+ sigma=2)),
333
+ dict(type='PackPoseInputs')
334
+ ]
335
+ val_pipeline = [
336
+ dict(type='LoadImage'),
337
+ dict(type='GetBBoxCenterScale'),
338
+ dict(type='TopdownAffine', input_size=(192, 256), use_udp=True),
339
+ dict(type='PackPoseInputs')
340
+ ]
341
+ train_dataloader = dict(
342
+ batch_size=8,
343
+ num_workers=4,
344
+ persistent_workers=True,
345
+ sampler=dict(type='DefaultSampler', shuffle=True),
346
+ dataset=dict(
347
+ data_mode='topdown',
348
+ ann_file=r".\train.json",
349
+ pipeline=[
350
+ dict(type='LoadImage'),
351
+ dict(type='GetBBoxCenterScale'),
352
+ dict(type='RandomFlip', direction='horizontal'),
353
+ dict(type='RandomHalfBody'),
354
+ dict(type='RandomBBoxTransform'),
355
+ dict(type='TopdownAffine', input_size=(192, 256), use_udp=True),
356
+ dict(
357
+ type='GenerateTarget',
358
+ encoder=dict(
359
+ type='UDPHeatmap',
360
+ input_size=(192, 256),
361
+ heatmap_size=(48, 64),
362
+ sigma=2)),
363
+ dict(type='PackPoseInputs')
364
+ ]))
365
+ val_dataloader = dict(
366
+ batch_size=4,
367
+ num_workers=1,
368
+ persistent_workers=True,
369
+ drop_last=False,
370
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
371
+ dataset=dict(
372
+ data_mode='topdown',
373
+ ann_file=r".\val.json",
374
+ bbox_file=None,
375
+ test_mode=True,
376
+ pipeline=[
377
+ dict(type='LoadImage'),
378
+ dict(type='GetBBoxCenterScale'),
379
+ dict(type='TopdownAffine', input_size=(192, 256), use_udp=True),
380
+ dict(type='PackPoseInputs')
381
+ ]))
382
+ test_dataloader = dict(
383
+ batch_size=4,
384
+ num_workers=1,
385
+ persistent_workers=True,
386
+ drop_last=False,
387
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
388
+ dataset=dict(
389
+ data_mode='topdown',
390
+ ann_file=r".\test.json",
391
+ bbox_file=None,
392
+ test_mode=True,
393
+ pipeline=[
394
+ dict(type='LoadImage'),
395
+ dict(type='GetBBoxCenterScale'),
396
+ dict(type='TopdownAffine', input_size=(192, 256), use_udp=True),
397
+ dict(type='PackPoseInputs')
398
+ ]))
399
+ val_evaluator = dict(type='NME', norm_mode='keypoint_distance')
400
+ test_evaluator = dict(type='NME', norm_mode='keypoint_distance')
401
+ # model_wrapper_cfg = dict(type='MMFullyShardedDataParallel', cpu_offload=True)
402
+ launcher = 'none'
403
+ work_dir=r"./"