Phoebux commited on
Commit
bbcd1ca
·
verified ·
1 Parent(s): c816d92

Upload folder using huggingface_hub

Browse files
dwpose_tools/models/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth filter=lfs diff=lfs merge=lfs -text
dwpose_tools/models/rtmw-x_8xb320-270e_cocktail14-384x288.py ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # common setting
2
+ num_keypoints = 133
3
+ input_size = (288, 384)
4
+
5
+ # runtime
6
+ max_epochs = 270
7
+ stage2_num_epochs = 10
8
+ base_lr = 5e-4
9
+ train_batch_size = 320
10
+ val_batch_size = 32
11
+
12
+ train_cfg = dict(max_epochs=max_epochs, val_interval=10)
13
+ randomness = dict(seed=21)
14
+
15
+ # optimizer
16
+ optim_wrapper = dict(
17
+ type='OptimWrapper',
18
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.1),
19
+ clip_grad=dict(max_norm=35, norm_type=2),
20
+ paramwise_cfg=dict(
21
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
22
+
23
+ # learning rate
24
+ param_scheduler = [
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=1.0e-5,
28
+ by_epoch=False,
29
+ begin=0,
30
+ end=1000),
31
+ dict(
32
+ # use cosine lr from 150 to 300 epoch
33
+ type='CosineAnnealingLR',
34
+ eta_min=base_lr * 0.05,
35
+ begin=max_epochs // 2,
36
+ end=max_epochs,
37
+ T_max=max_epochs // 2,
38
+ by_epoch=True,
39
+ convert_to_iter_based=True),
40
+ ]
41
+
42
+ # automatically scaling LR based on the actual training batch size
43
+ auto_scale_lr = dict(base_batch_size=2560)
44
+
45
+ # codec settings
46
+ codec = dict(
47
+ type='SimCCLabel',
48
+ input_size=input_size,
49
+ sigma=(6., 6.93),
50
+ simcc_split_ratio=2.0,
51
+ normalize=False,
52
+ use_dark=False,
53
+ decode_visibility=True)
54
+
55
+ # model settings
56
+ model = dict(
57
+ type='TopdownPoseEstimator',
58
+ data_preprocessor=dict(
59
+ type='PoseDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True),
63
+ backbone=dict(
64
+ type='CSPNeXt',
65
+ arch='P5',
66
+ expand_ratio=0.5,
67
+ deepen_factor=1.33,
68
+ widen_factor=1.25,
69
+ channel_attention=True,
70
+ norm_cfg=dict(type='BN'),
71
+ act_cfg=dict(type='SiLU'),
72
+ init_cfg=dict(
73
+ type='Pretrained',
74
+ prefix='backbone.',
75
+ checkpoint='https://download.openmmlab.com/mmpose/v1/'
76
+ 'wholebody_2d_keypoint/rtmpose/ubody/rtmpose-x_simcc-ucoco_pt-aic-coco_270e-384x288-f5b50679_20230822.pth' # noqa
77
+ )),
78
+ neck=dict(
79
+ type='CSPNeXtPAFPN',
80
+ in_channels=[320, 640, 1280],
81
+ out_channels=None,
82
+ out_indices=(
83
+ 1,
84
+ 2,
85
+ ),
86
+ num_csp_blocks=2,
87
+ expand_ratio=0.5,
88
+ norm_cfg=dict(type='SyncBN'),
89
+ act_cfg=dict(type='SiLU', inplace=True)),
90
+ head=dict(
91
+ type='RTMWHead',
92
+ in_channels=1280,
93
+ out_channels=num_keypoints,
94
+ input_size=input_size,
95
+ in_featuremap_size=tuple([s // 32 for s in input_size]),
96
+ simcc_split_ratio=codec['simcc_split_ratio'],
97
+ final_layer_kernel_size=7,
98
+ gau_cfg=dict(
99
+ hidden_dims=256,
100
+ s=128,
101
+ expansion_factor=2,
102
+ dropout_rate=0.,
103
+ drop_path=0.,
104
+ act_fn='SiLU',
105
+ use_rel_bias=False,
106
+ pos_enc=False),
107
+ loss=dict(
108
+ type='KLDiscretLoss',
109
+ use_target_weight=True,
110
+ beta=1.,
111
+ label_softmax=True,
112
+ label_beta=10.,
113
+ mask=list(range(23, 91)),
114
+ mask_weight=0.5,
115
+ ),
116
+ decoder=codec),
117
+ test_cfg=dict(flip_test=True))
118
+
119
+ # base dataset settings
120
+ dataset_type = 'CocoWholeBodyDataset'
121
+ data_mode = 'topdown'
122
+ data_root = 'data/'
123
+
124
+ backend_args = dict(backend='local')
125
+
126
+ # pipelines
127
+ train_pipeline = [
128
+ dict(type='LoadImage', backend_args=backend_args),
129
+ dict(type='GetBBoxCenterScale'),
130
+ dict(type='RandomFlip', direction='horizontal'),
131
+ dict(type='RandomHalfBody'),
132
+ dict(
133
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
134
+ dict(type='TopdownAffine', input_size=codec['input_size']),
135
+ dict(type='PhotometricDistortion'),
136
+ dict(
137
+ type='Albumentation',
138
+ transforms=[
139
+ dict(type='Blur', p=0.1),
140
+ dict(type='MedianBlur', p=0.1),
141
+ dict(
142
+ type='CoarseDropout',
143
+ max_holes=1,
144
+ max_height=0.4,
145
+ max_width=0.4,
146
+ min_holes=1,
147
+ min_height=0.2,
148
+ min_width=0.2,
149
+ p=0.5),
150
+ ]),
151
+ dict(
152
+ type='GenerateTarget',
153
+ encoder=codec,
154
+ use_dataset_keypoint_weights=True),
155
+ dict(type='PackPoseInputs')
156
+ ]
157
+ val_pipeline = [
158
+ dict(type='LoadImage', backend_args=backend_args),
159
+ dict(type='GetBBoxCenterScale'),
160
+ dict(type='TopdownAffine', input_size=codec['input_size']),
161
+ dict(type='PackPoseInputs')
162
+ ]
163
+ train_pipeline_stage2 = [
164
+ dict(type='LoadImage', backend_args=backend_args),
165
+ dict(type='GetBBoxCenterScale'),
166
+ dict(type='RandomFlip', direction='horizontal'),
167
+ dict(type='RandomHalfBody'),
168
+ dict(
169
+ type='RandomBBoxTransform',
170
+ shift_factor=0.,
171
+ scale_factor=[0.5, 1.5],
172
+ rotate_factor=90),
173
+ dict(type='TopdownAffine', input_size=codec['input_size']),
174
+ dict(
175
+ type='Albumentation',
176
+ transforms=[
177
+ dict(type='Blur', p=0.1),
178
+ dict(type='MedianBlur', p=0.1),
179
+ ]),
180
+ dict(
181
+ type='GenerateTarget',
182
+ encoder=codec,
183
+ use_dataset_keypoint_weights=True),
184
+ dict(type='PackPoseInputs')
185
+ ]
186
+
187
+ # mapping
188
+
189
+ aic_coco133 = [(0, 6), (1, 8), (2, 10), (3, 5), (4, 7), (5, 9), (6, 12),
190
+ (7, 14), (8, 16), (9, 11), (10, 13), (11, 15)]
191
+
192
+ crowdpose_coco133 = [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9), (5, 10), (6, 11),
193
+ (7, 12), (8, 13), (9, 14), (10, 15), (11, 16)]
194
+
195
+ mpii_coco133 = [
196
+ (0, 16),
197
+ (1, 14),
198
+ (2, 12),
199
+ (3, 11),
200
+ (4, 13),
201
+ (5, 15),
202
+ (10, 10),
203
+ (11, 8),
204
+ (12, 6),
205
+ (13, 5),
206
+ (14, 7),
207
+ (15, 9),
208
+ ]
209
+
210
+ jhmdb_coco133 = [
211
+ (3, 6),
212
+ (4, 5),
213
+ (5, 12),
214
+ (6, 11),
215
+ (7, 8),
216
+ (8, 7),
217
+ (9, 14),
218
+ (10, 13),
219
+ (11, 10),
220
+ (12, 9),
221
+ (13, 16),
222
+ (14, 15),
223
+ ]
224
+
225
+ halpe_coco133 = [(i, i)
226
+ for i in range(17)] + [(20, 17), (21, 20), (22, 18), (23, 21),
227
+ (24, 19),
228
+ (25, 22)] + [(i, i - 3)
229
+ for i in range(26, 136)]
230
+
231
+ posetrack_coco133 = [
232
+ (0, 0),
233
+ (3, 3),
234
+ (4, 4),
235
+ (5, 5),
236
+ (6, 6),
237
+ (7, 7),
238
+ (8, 8),
239
+ (9, 9),
240
+ (10, 10),
241
+ (11, 11),
242
+ (12, 12),
243
+ (13, 13),
244
+ (14, 14),
245
+ (15, 15),
246
+ (16, 16),
247
+ ]
248
+
249
+ humanart_coco133 = [(i, i) for i in range(17)] + [(17, 99), (18, 120),
250
+ (19, 17), (20, 20)]
251
+
252
+ # train datasets
253
+ dataset_coco = dict(
254
+ type=dataset_type,
255
+ data_root=data_root,
256
+ data_mode=data_mode,
257
+ ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
258
+ data_prefix=dict(img='detection/coco/train2017/'),
259
+ pipeline=[],
260
+ )
261
+
262
+ dataset_aic = dict(
263
+ type='AicDataset',
264
+ data_root=data_root,
265
+ data_mode=data_mode,
266
+ ann_file='aic/annotations/aic_train.json',
267
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
268
+ '_train_20170902/keypoint_train_images_20170902/'),
269
+ pipeline=[
270
+ dict(
271
+ type='KeypointConverter',
272
+ num_keypoints=num_keypoints,
273
+ mapping=aic_coco133)
274
+ ],
275
+ )
276
+
277
+ dataset_crowdpose = dict(
278
+ type='CrowdPoseDataset',
279
+ data_root=data_root,
280
+ data_mode=data_mode,
281
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
282
+ data_prefix=dict(img='pose/CrowdPose/images/'),
283
+ pipeline=[
284
+ dict(
285
+ type='KeypointConverter',
286
+ num_keypoints=num_keypoints,
287
+ mapping=crowdpose_coco133)
288
+ ],
289
+ )
290
+
291
+ dataset_mpii = dict(
292
+ type='MpiiDataset',
293
+ data_root=data_root,
294
+ data_mode=data_mode,
295
+ ann_file='mpii/annotations/mpii_train.json',
296
+ data_prefix=dict(img='pose/MPI/images/'),
297
+ pipeline=[
298
+ dict(
299
+ type='KeypointConverter',
300
+ num_keypoints=num_keypoints,
301
+ mapping=mpii_coco133)
302
+ ],
303
+ )
304
+
305
+ dataset_jhmdb = dict(
306
+ type='JhmdbDataset',
307
+ data_root=data_root,
308
+ data_mode=data_mode,
309
+ ann_file='jhmdb/annotations/Sub1_train.json',
310
+ data_prefix=dict(img='pose/JHMDB/'),
311
+ pipeline=[
312
+ dict(
313
+ type='KeypointConverter',
314
+ num_keypoints=num_keypoints,
315
+ mapping=jhmdb_coco133)
316
+ ],
317
+ )
318
+
319
+ dataset_halpe = dict(
320
+ type='HalpeDataset',
321
+ data_root=data_root,
322
+ data_mode=data_mode,
323
+ ann_file='halpe/annotations/halpe_train_v1.json',
324
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
325
+ pipeline=[
326
+ dict(
327
+ type='KeypointConverter',
328
+ num_keypoints=num_keypoints,
329
+ mapping=halpe_coco133)
330
+ ],
331
+ )
332
+
333
+ dataset_posetrack = dict(
334
+ type='PoseTrack18Dataset',
335
+ data_root=data_root,
336
+ data_mode=data_mode,
337
+ ann_file='posetrack18/annotations/posetrack18_train.json',
338
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
339
+ pipeline=[
340
+ dict(
341
+ type='KeypointConverter',
342
+ num_keypoints=num_keypoints,
343
+ mapping=posetrack_coco133)
344
+ ],
345
+ )
346
+
347
+ dataset_humanart = dict(
348
+ type='HumanArt21Dataset',
349
+ data_root=data_root,
350
+ data_mode=data_mode,
351
+ ann_file='HumanArt/annotations/training_humanart.json',
352
+ filter_cfg=dict(scenes=['real_human']),
353
+ data_prefix=dict(img='pose/'),
354
+ pipeline=[
355
+ dict(
356
+ type='KeypointConverter',
357
+ num_keypoints=num_keypoints,
358
+ mapping=humanart_coco133)
359
+ ])
360
+
361
+ ubody_scenes = [
362
+ 'Magic_show', 'Entertainment', 'ConductMusic', 'Online_class', 'TalkShow',
363
+ 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow', 'Singing',
364
+ 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference'
365
+ ]
366
+
367
+ ubody_datasets = []
368
+ for scene in ubody_scenes:
369
+ each = dict(
370
+ type='UBody2dDataset',
371
+ data_root=data_root,
372
+ data_mode=data_mode,
373
+ ann_file=f'Ubody/annotations/{scene}/train_annotations.json',
374
+ data_prefix=dict(img='pose/UBody/images/'),
375
+ pipeline=[],
376
+ sample_interval=10)
377
+ ubody_datasets.append(each)
378
+
379
+ dataset_ubody = dict(
380
+ type='CombinedDataset',
381
+ metainfo=dict(from_file='configs/_base_/datasets/ubody2d.py'),
382
+ datasets=ubody_datasets,
383
+ pipeline=[],
384
+ test_mode=False,
385
+ )
386
+
387
+ face_pipeline = [
388
+ dict(type='LoadImage', backend_args=backend_args),
389
+ dict(type='GetBBoxCenterScale', padding=1.25),
390
+ dict(
391
+ type='RandomBBoxTransform',
392
+ shift_factor=0.,
393
+ scale_factor=[1.5, 2.0],
394
+ rotate_factor=0),
395
+ ]
396
+
397
+ wflw_coco133 = [(i * 2, 23 + i)
398
+ for i in range(17)] + [(33 + i, 40 + i) for i in range(5)] + [
399
+ (42 + i, 45 + i) for i in range(5)
400
+ ] + [(51 + i, 50 + i)
401
+ for i in range(9)] + [(60, 59), (61, 60), (63, 61),
402
+ (64, 62), (65, 63), (67, 64),
403
+ (68, 65), (69, 66), (71, 67),
404
+ (72, 68), (73, 69),
405
+ (75, 70)] + [(76 + i, 71 + i)
406
+ for i in range(20)]
407
+ dataset_wflw = dict(
408
+ type='WFLWDataset',
409
+ data_root=data_root,
410
+ data_mode=data_mode,
411
+ ann_file='wflw/annotations/face_landmarks_wflw_train.json',
412
+ data_prefix=dict(img='pose/WFLW/images/'),
413
+ pipeline=[
414
+ dict(
415
+ type='KeypointConverter',
416
+ num_keypoints=num_keypoints,
417
+ mapping=wflw_coco133), *face_pipeline
418
+ ],
419
+ )
420
+
421
+ mapping_300w_coco133 = [(i, 23 + i) for i in range(68)]
422
+ dataset_300w = dict(
423
+ type='Face300WDataset',
424
+ data_root=data_root,
425
+ data_mode=data_mode,
426
+ ann_file='300w/annotations/face_landmarks_300w_train.json',
427
+ data_prefix=dict(img='pose/300w/images/'),
428
+ pipeline=[
429
+ dict(
430
+ type='KeypointConverter',
431
+ num_keypoints=num_keypoints,
432
+ mapping=mapping_300w_coco133), *face_pipeline
433
+ ],
434
+ )
435
+
436
+ cofw_coco133 = [(0, 40), (2, 44), (4, 42), (1, 49), (3, 45), (6, 47), (8, 59),
437
+ (10, 62), (9, 68), (11, 65), (18, 54), (19, 58), (20, 53),
438
+ (21, 56), (22, 71), (23, 77), (24, 74), (25, 85), (26, 89),
439
+ (27, 80), (28, 31)]
440
+ dataset_cofw = dict(
441
+ type='COFWDataset',
442
+ data_root=data_root,
443
+ data_mode=data_mode,
444
+ ann_file='cofw/annotations/cofw_train.json',
445
+ data_prefix=dict(img='pose/COFW/images/'),
446
+ pipeline=[
447
+ dict(
448
+ type='KeypointConverter',
449
+ num_keypoints=num_keypoints,
450
+ mapping=cofw_coco133), *face_pipeline
451
+ ],
452
+ )
453
+
454
+ lapa_coco133 = [(i * 2, 23 + i) for i in range(17)] + [
455
+ (33 + i, 40 + i) for i in range(5)
456
+ ] + [(42 + i, 45 + i) for i in range(5)] + [
457
+ (51 + i, 50 + i) for i in range(4)
458
+ ] + [(58 + i, 54 + i) for i in range(5)] + [(66, 59), (67, 60), (69, 61),
459
+ (70, 62), (71, 63), (73, 64),
460
+ (75, 65), (76, 66), (78, 67),
461
+ (79, 68), (80, 69),
462
+ (82, 70)] + [(84 + i, 71 + i)
463
+ for i in range(20)]
464
+ dataset_lapa = dict(
465
+ type='LapaDataset',
466
+ data_root=data_root,
467
+ data_mode=data_mode,
468
+ ann_file='LaPa/annotations/lapa_trainval.json',
469
+ data_prefix=dict(img='pose/LaPa/'),
470
+ pipeline=[
471
+ dict(
472
+ type='KeypointConverter',
473
+ num_keypoints=num_keypoints,
474
+ mapping=lapa_coco133), *face_pipeline
475
+ ],
476
+ )
477
+
478
+ dataset_wb = dict(
479
+ type='CombinedDataset',
480
+ metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
481
+ datasets=[dataset_coco, dataset_halpe, dataset_ubody],
482
+ pipeline=[],
483
+ test_mode=False,
484
+ )
485
+
486
+ dataset_body = dict(
487
+ type='CombinedDataset',
488
+ metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
489
+ datasets=[
490
+ dataset_aic,
491
+ dataset_crowdpose,
492
+ dataset_mpii,
493
+ dataset_jhmdb,
494
+ dataset_posetrack,
495
+ dataset_humanart,
496
+ ],
497
+ pipeline=[],
498
+ test_mode=False,
499
+ )
500
+
501
+ dataset_face = dict(
502
+ type='CombinedDataset',
503
+ metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
504
+ datasets=[
505
+ dataset_wflw,
506
+ dataset_300w,
507
+ dataset_cofw,
508
+ dataset_lapa,
509
+ ],
510
+ pipeline=[],
511
+ test_mode=False,
512
+ )
513
+
514
+ hand_pipeline = [
515
+ dict(type='LoadImage', backend_args=backend_args),
516
+ dict(type='GetBBoxCenterScale'),
517
+ dict(
518
+ type='RandomBBoxTransform',
519
+ shift_factor=0.,
520
+ scale_factor=[1.5, 2.0],
521
+ rotate_factor=0),
522
+ ]
523
+
524
+ interhand_left = [(21, 95), (22, 94), (23, 93), (24, 92), (25, 99), (26, 98),
525
+ (27, 97), (28, 96), (29, 103), (30, 102), (31, 101),
526
+ (32, 100), (33, 107), (34, 106), (35, 105), (36, 104),
527
+ (37, 111), (38, 110), (39, 109), (40, 108), (41, 91)]
528
+ interhand_right = [(i - 21, j + 21) for i, j in interhand_left]
529
+ interhand_coco133 = interhand_right + interhand_left
530
+
531
+ dataset_interhand2d = dict(
532
+ type='InterHand2DDoubleDataset',
533
+ data_root=data_root,
534
+ data_mode=data_mode,
535
+ ann_file='interhand26m/annotations/all/InterHand2.6M_train_data.json',
536
+ camera_param_file='interhand26m/annotations/all/'
537
+ 'InterHand2.6M_train_camera.json',
538
+ joint_file='interhand26m/annotations/all/'
539
+ 'InterHand2.6M_train_joint_3d.json',
540
+ data_prefix=dict(img='interhand2.6m/images/train/'),
541
+ sample_interval=10,
542
+ pipeline=[
543
+ dict(
544
+ type='KeypointConverter',
545
+ num_keypoints=num_keypoints,
546
+ mapping=interhand_coco133,
547
+ ), *hand_pipeline
548
+ ],
549
+ )
550
+
551
+ dataset_hand = dict(
552
+ type='CombinedDataset',
553
+ metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
554
+ datasets=[dataset_interhand2d],
555
+ pipeline=[],
556
+ test_mode=False,
557
+ )
558
+
559
+ train_datasets = [dataset_wb, dataset_body, dataset_face, dataset_hand]
560
+
561
+ # data loaders
562
+ train_dataloader = dict(
563
+ batch_size=train_batch_size,
564
+ num_workers=4,
565
+ pin_memory=False,
566
+ persistent_workers=True,
567
+ sampler=dict(type='DefaultSampler', shuffle=True),
568
+ dataset=dict(
569
+ type='CombinedDataset',
570
+ metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
571
+ datasets=train_datasets,
572
+ pipeline=train_pipeline,
573
+ test_mode=False,
574
+ ))
575
+
576
+ val_dataloader = dict(
577
+ batch_size=val_batch_size,
578
+ num_workers=4,
579
+ persistent_workers=True,
580
+ drop_last=False,
581
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
582
+ dataset=dict(
583
+ type='CocoWholeBodyDataset',
584
+ ann_file='data/coco/annotations/coco_wholebody_val_v1.0.json',
585
+ data_prefix=dict(img='data/detection/coco/val2017/'),
586
+ pipeline=val_pipeline,
587
+ bbox_file='data/coco/person_detection_results/'
588
+ 'COCO_val2017_detections_AP_H_56_person.json',
589
+ test_mode=True))
590
+
591
+ test_dataloader = val_dataloader
592
+
593
+ # hooks
594
+ default_hooks = dict(
595
+ checkpoint=dict(
596
+ save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
597
+
598
+ custom_hooks = [
599
+ dict(
600
+ type='EMAHook',
601
+ ema_type='ExpMomentumEMA',
602
+ momentum=0.0002,
603
+ update_buffers=True,
604
+ priority=49),
605
+ dict(
606
+ type='mmdet.PipelineSwitchHook',
607
+ switch_epoch=max_epochs - stage2_num_epochs,
608
+ switch_pipeline=train_pipeline_stage2)
609
+ ]
610
+
611
+ # evaluators
612
+ val_evaluator = dict(
613
+ type='CocoWholeBodyMetric',
614
+ ann_file='data/coco/annotations/coco_wholebody_val_v1.0.json')
615
+ test_evaluator = val_evaluator
dwpose_tools/models/rtmw-x_simcc-cocktail14_pt-ucoco_270e-384x288-f840f204_20231122.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f840f2044fe46cb3821b7cea86be83e1f6cba406ccd28f5475ac010412dcda95
3
+ size 369720404
dwpose_tools/models/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3bd2b23e4cd178bfcc756df67e0d0949f3d77e0a73482f6da694c580ed54da1
3
+ size 217289556
dwpose_tools/models/yolox_l_8xb8-300e_coco.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ img_scale = (640, 640) # width, height
2
+
3
+ # model settings
4
+ model = dict(
5
+ type='YOLOX',
6
+ data_preprocessor=dict(
7
+ type='DetDataPreprocessor',
8
+ pad_size_divisor=32,
9
+ batch_augments=[
10
+ dict(
11
+ type='BatchSyncRandomResize',
12
+ random_size_range=(480, 800),
13
+ size_divisor=32,
14
+ interval=10)
15
+ ]),
16
+ backbone=dict(
17
+ type='CSPDarknet',
18
+ deepen_factor=1.0,
19
+ widen_factor=1.0,
20
+ out_indices=(2, 3, 4),
21
+ use_depthwise=False,
22
+ spp_kernal_sizes=(5, 9, 13),
23
+ norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
24
+ act_cfg=dict(type='Swish'),
25
+ ),
26
+ neck=dict(
27
+ type='YOLOXPAFPN',
28
+ in_channels=[256, 512, 1024],
29
+ out_channels=256,
30
+ num_csp_blocks=3,
31
+ use_depthwise=False,
32
+ upsample_cfg=dict(scale_factor=2, mode='nearest'),
33
+ norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
34
+ act_cfg=dict(type='Swish')),
35
+ bbox_head=dict(
36
+ type='YOLOXHead',
37
+ num_classes=80,
38
+ in_channels=256,
39
+ feat_channels=256,
40
+ stacked_convs=2,
41
+ strides=(8, 16, 32),
42
+ use_depthwise=False,
43
+ norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
44
+ act_cfg=dict(type='Swish'),
45
+ loss_cls=dict(
46
+ type='CrossEntropyLoss',
47
+ use_sigmoid=True,
48
+ reduction='sum',
49
+ loss_weight=1.0),
50
+ loss_bbox=dict(
51
+ type='IoULoss',
52
+ mode='square',
53
+ eps=1e-16,
54
+ reduction='sum',
55
+ loss_weight=5.0),
56
+ loss_obj=dict(
57
+ type='CrossEntropyLoss',
58
+ use_sigmoid=True,
59
+ reduction='sum',
60
+ loss_weight=1.0),
61
+ loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
62
+ train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
63
+ # In order to align the source code, the threshold of the val phase is
64
+ # 0.01, and the threshold of the test phase is 0.001.
65
+ test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
66
+
67
+ # dataset settings
68
+ data_root = 'data/coco/'
69
+ dataset_type = 'CocoDataset'
70
+
71
+ # Example to use different file client
72
+ # Method 1: simply set the data root and let the file I/O module
73
+ # automatically infer from prefix (not support LMDB and Memcache yet)
74
+
75
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
76
+
77
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
78
+ # backend_args = dict(
79
+ # backend='petrel',
80
+ # path_mapping=dict({
81
+ # './data/': 's3://openmmlab/datasets/detection/',
82
+ # 'data/': 's3://openmmlab/datasets/detection/'
83
+ # }))
84
+ backend_args = None
85
+
86
+ train_pipeline = [
87
+ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
88
+ dict(
89
+ type='RandomAffine',
90
+ scaling_ratio_range=(0.1, 2),
91
+ # img_scale is (width, height)
92
+ border=(-img_scale[0] // 2, -img_scale[1] // 2)),
93
+ dict(
94
+ type='MixUp',
95
+ img_scale=img_scale,
96
+ ratio_range=(0.8, 1.6),
97
+ pad_val=114.0),
98
+ dict(type='YOLOXHSVRandomAug'),
99
+ dict(type='RandomFlip', prob=0.5),
100
+ # According to the official implementation, multi-scale
101
+ # training is not considered here but in the
102
+ # 'mmdet/models/detectors/yolox.py'.
103
+ # Resize and Pad are for the last 15 epochs when Mosaic,
104
+ # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
105
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
106
+ dict(
107
+ type='Pad',
108
+ pad_to_square=True,
109
+ # If the image is three-channel, the pad value needs
110
+ # to be set separately for each channel.
111
+ pad_val=dict(img=(114.0, 114.0, 114.0))),
112
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
113
+ dict(type='PackDetInputs')
114
+ ]
115
+
116
+ train_dataset = dict(
117
+ # use MultiImageMixDataset wrapper to support mosaic and mixup
118
+ type='MultiImageMixDataset',
119
+ dataset=dict(
120
+ type=dataset_type,
121
+ data_root=data_root,
122
+ ann_file='annotations/instances_train2017.json',
123
+ data_prefix=dict(img='train2017/'),
124
+ pipeline=[
125
+ dict(type='LoadImageFromFile', backend_args=backend_args),
126
+ dict(type='LoadAnnotations', with_bbox=True)
127
+ ],
128
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
129
+ backend_args=backend_args),
130
+ pipeline=train_pipeline)
131
+
132
+ test_pipeline = [
133
+ dict(type='LoadImageFromFile', backend_args=backend_args),
134
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
135
+ dict(
136
+ type='Pad',
137
+ pad_to_square=True,
138
+ pad_val=dict(img=(114.0, 114.0, 114.0))),
139
+ dict(type='LoadAnnotations', with_bbox=True),
140
+ dict(
141
+ type='PackDetInputs',
142
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
143
+ 'scale_factor'))
144
+ ]
145
+
146
+ train_dataloader = dict(
147
+ batch_size=8,
148
+ num_workers=4,
149
+ persistent_workers=True,
150
+ sampler=dict(type='DefaultSampler', shuffle=True),
151
+ dataset=train_dataset)
152
+ val_dataloader = dict(
153
+ batch_size=8,
154
+ num_workers=4,
155
+ persistent_workers=True,
156
+ drop_last=False,
157
+ sampler=dict(type='DefaultSampler', shuffle=False),
158
+ dataset=dict(
159
+ type=dataset_type,
160
+ data_root=data_root,
161
+ ann_file='annotations/instances_val2017.json',
162
+ data_prefix=dict(img='val2017/'),
163
+ test_mode=True,
164
+ pipeline=test_pipeline,
165
+ backend_args=backend_args))
166
+ test_dataloader = val_dataloader
167
+
168
+ val_evaluator = dict(
169
+ type='CocoMetric',
170
+ ann_file=data_root + 'annotations/instances_val2017.json',
171
+ metric='bbox',
172
+ backend_args=backend_args)
173
+ test_evaluator = val_evaluator
174
+
175
+ # training settings
176
+ max_epochs = 300
177
+ num_last_epochs = 15
178
+ interval = 10
179
+
180
+ train_cfg = dict(max_epochs=max_epochs, val_interval=interval)
181
+
182
+ # optimizer
183
+ # default 8 gpu
184
+ base_lr = 0.01
185
+ optim_wrapper = dict(
186
+ type='OptimWrapper',
187
+ optimizer=dict(
188
+ type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
189
+ nesterov=True),
190
+ paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
191
+
192
+ # learning rate
193
+ param_scheduler = [
194
+ dict(
195
+ # use quadratic formula to warm up 5 epochs
196
+ # and lr is updated by iteration
197
+ # TODO: fix default scope in get function
198
+ type='mmdet.QuadraticWarmupLR',
199
+ by_epoch=True,
200
+ begin=0,
201
+ end=5,
202
+ convert_to_iter_based=True),
203
+ dict(
204
+ # use cosine lr from 5 to 285 epoch
205
+ type='CosineAnnealingLR',
206
+ eta_min=base_lr * 0.05,
207
+ begin=5,
208
+ T_max=max_epochs - num_last_epochs,
209
+ end=max_epochs - num_last_epochs,
210
+ by_epoch=True,
211
+ convert_to_iter_based=True),
212
+ dict(
213
+ # use fixed lr during last 15 epochs
214
+ type='ConstantLR',
215
+ by_epoch=True,
216
+ factor=1,
217
+ begin=max_epochs - num_last_epochs,
218
+ end=max_epochs,
219
+ )
220
+ ]
221
+
222
+ default_hooks = dict(
223
+ checkpoint=dict(
224
+ interval=interval,
225
+ max_keep_ckpts=3 # only keep latest 3 checkpoints
226
+ ))
227
+
228
+ custom_hooks = [
229
+ dict(
230
+ type='YOLOXModeSwitchHook',
231
+ num_last_epochs=num_last_epochs,
232
+ priority=48),
233
+ dict(type='SyncNormHook', priority=48),
234
+ dict(
235
+ type='EMAHook',
236
+ ema_type='ExpMomentumEMA',
237
+ momentum=0.0001,
238
+ update_buffers=True,
239
+ priority=49)
240
+ ]
241
+
242
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
243
+ # USER SHOULD NOT CHANGE ITS VALUES.
244
+ # base_batch_size = (8 GPUs) x (8 samples per GPU)
245
+ auto_scale_lr = dict(base_batch_size=64)