use_amp: True use_ema: True ema: type: ModelEMA decay: 0.9999 warmups: 2000 start: 0 epoches: 72 clip_max_norm: 0.1 train_dataloader: total_batch_size: 16 optimizer: type: AdamW params: - params: '^(?=.*backbone)(?!.*norm).*$' lr: 0.00001 - params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' weight_decay: 0. lr: 0.0001 betas: [0.9, 0.999] weight_decay: 0.0001 lr_scheduler: type: MultiStepLR milestones: [1000] gamma: 0.1 lr_warmup_scheduler: type: LinearWarmup warmup_duration: 2000