| dataset: | |
| ddpm: false | |
| ddpm_ratio: 0 | |
| input_size: 224 | |
| name: imagenet | |
| num_classes: 1000 | |
| loss: | |
| eps: 0.1411764705882353 | |
| gamma: 5.0 | |
| loss_type: logit_annealing_loss | |
| max_eps_ratio: 2.0 | |
| min_eps_ratio: 0.1 | |
| offset: 2.0 | |
| temperature: 0.75 | |
| model: | |
| act_name: MinMax | |
| backbone_centering: true | |
| backbone_type: hybridbrov2 | |
| backbone_weight_rank_ratio: 0.5 | |
| dense_type: cholesky | |
| dense_width: 2048 | |
| depth: 14 | |
| depth_1: 6 | |
| depth_2: 8 | |
| linear_num: 8 | |
| neck_conv_patch_size: 8 | |
| neck_conv_patch_size_2: 0 | |
| neck_conv_type: l2 | |
| neck_linear_type: cholesky | |
| num_lc_iter: 10 | |
| stem_kernel_size: 5 | |
| use_lln: true | |
| width: 588 | |
| training: | |
| batch_size: 1024 | |
| epochs: 400 | |
| grad_clip: true | |
| grad_clip_val: 3 | |
| lion: false | |
| lookahead: true | |
| lr: 0.001 | |
| momentum: 0.0 | |
| nadam: true | |
| sgd: false | |
| warmup_epochs: 20 | |
| weight_decay: 0.0 | |