Yuanzhi commited on
Commit
4d85afd
·
verified ·
1 Parent(s): a0296cf

Upload 6 files

Browse files
maskbit/DDMD-GPUS1-bs128-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-emb_pert_fix_0.1-tcfg1.5-fcfg1.0-fcfgt1.0-r_mode_arccos-r_mode_f_arccos-reduce_sum-seed3407-bf16_checkpoint-50000/config.yaml ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: DDMD-GPUS1-bs128-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-emb_pert_fix_0.1-tcfg1.5-fcfg1.0-fcfgt1.0-r_mode_arccos-r_mode_f_arccos-reduce_sum-seed3407-bf16
2
+ report_to: wandb
3
+ output_dir: outputs//MaskBit_benchmark/DDMD-GPUS1-bs128-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-emb_pert_fix_0.1-tcfg1.5-fcfg1.0-fcfgt1.0-r_mode_arccos-r_mode_f_arccos-reduce_sum-seed3407-bf16/2025-08-11T16-13
4
+ pretrained_model_path: ./pretrained_maskgit/MaskGIT/MaskGIT_ImageNet_256.pth
5
+ pretrained_vae_path: ./pretrained_maskgit/VQGAN/
6
+ wandb_project: DiMO_GAN_MaskBit
7
+ wandb_user: yuanzhi_zhu
8
+ ema_cpu: false
9
+ ema_freq: 1
10
+ max_train_steps: 200001
11
+ log_steps: 50
12
+ validation_steps: 200
13
+ validation_steps_tuple:
14
+ - 2
15
+ - 50
16
+ - 200
17
+ - 500
18
+ metric_type: fid
19
+ metric_steps: 2000
20
+ metric_steps_tuple:
21
+ - 2000
22
+ - 5000
23
+ scale_lr: false
24
+ lr_warmup_steps: 500
25
+ lr_scheduler_type: constant_with_warmup
26
+ train_batch_size: 128
27
+ loss_reduction: sum
28
+ optimizer_type: adam
29
+ adam_beta1: 0.9
30
+ adam_beta2: 0.999
31
+ fake_adam_beta1: 0.9
32
+ fake_adam_beta2: 0.999
33
+ adam_weight_decay: 0.0
34
+ adam_epsilon: 1.0e-06
35
+ max_grad_norm: 1.0
36
+ gradient_accumulation_steps: 1
37
+ gradient_checkpointing: false
38
+ checkpointing_steps: 1000
39
+ mixed_precision: bf16
40
+ enable_xformers_memory_efficient_attention: false
41
+ global_seed: 3407
42
+ is_debug: false
43
+ cast_models_to_dtype: false
44
+ checkpoints_total_limit: 5
45
+ checkpointing_steps_tuple:
46
+ - 5000
47
+ - 10000
48
+ - 15000
49
+ - 20000
50
+ - 25000
51
+ - 30000
52
+ - 40000
53
+ - 50000
54
+ - 60000
55
+ - 70000
56
+ - 80000
57
+ - 90000
58
+ - 100000
59
+ resume_from_checkpoint_path: ''
60
+ no_progress_bar: true
61
+ generator_lr: 1.0e-06
62
+ fake_rounds: 1
63
+ weight_factor_type: uniform
64
+ dm_loss_weight: 1.0
65
+ distil_loss_type: FKL
66
+ fixed_ratio: fix_0.6
67
+ noise_emb_perturb: fix_0.0
68
+ top_k: 0
69
+ top_p: 0.0
70
+ true_cfg: 1.5
71
+ fake_cfg_eval: 1.0
72
+ fake_cfg_train: 1.0
73
+ fake_lr: 1.0e-06
74
+ fake_cfg_drop_ratio: 0.0
75
+ gen_temp: 1.0
76
+ true_temp: 1.0
77
+ fake_temp: 1.0
78
+ ignore_index: -1
79
+ ratio_mode: arccos
80
+ ratio_mode_fake: arccos
81
+ temperature_fake: 1.0
82
+ alpha_fake: 0.0
83
+ fix_emb_layer: true
84
+ adaptive_cfg: false
85
+ use_discriminator: 0.0
86
+ discriminator_lr: 1.0e-06
87
+ g_loss_weight: 0.1
88
+ d_loss_weight: 1.0
89
+ gan_noise_level: continuous_0.-0.5
90
+ noisy_as_masked_emb: true
91
+ noisy_as_noisy_feat: false
92
+ same_maskemb: true
93
+ generator_loss_type: non_saturating
94
+ use_discriminator_steps: 1000
95
+ emb_mode: plain
96
+ tau: 0.5
97
+ tau_min: 0.1
98
+ dis_min_ch: 256
99
+ dis_f_layer: 3
100
+ dis_emb_input: true
101
+ dis_t_cond: true
102
+ dis_adam_beta1: 0.0
103
+ dis_adam_beta2: 0.999
104
+ sched_mode: arccos
105
+ sampling_step: 64
106
+ mask_value: 1024
107
+ cfg_w: 5.8
108
+ r_temp: 10.3
109
+ sm_temp: 1.0
110
+ Jeffreys_beta: 0.0
111
+ run_id: 2025-08-11T16-13
112
+ precision: bf16
113
+ folder_name: DDMD-GPUS1-bs128-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-emb_pert_fix_0.1-tcfg1.5-fcfg1.0-fcfgt1.0-r_mode_arccos-r_mode_f_arccos-reduce_sum-seed3407-bf16/2025-08-11T16-13
114
+ output_name: /MaskBit_benchmark
115
+ ema_decay: 0.9999
116
+ resolution: 256
maskbit/DDMD-GPUS1-bs128-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-emb_pert_fix_0.1-tcfg1.5-fcfg1.0-fcfgt1.0-r_mode_arccos-r_mode_f_arccos-reduce_sum-seed3407-bf16_checkpoint-50000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ab49d06acf857f69522e6cfa8b100d1d3af180ad60af07073bb23de04a50a1
3
+ size 1219832638
maskbit/DiMO2-GPUS2-bs64-glr5e-07-flr5e-07-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-non_saturating-dlr5e-07-g_w200.0-d_w1.0-dm_w1.0-g_noise_0.-0.95-plain-bf16_checkpoint-50000/config.yaml ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: DiMO2-GPUS2-bs64-glr5e-07-flr5e-07-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-non_saturating-dlr5e-07-g_w200.0-d_w1.0-dm_w1.0-g_noise_0.-0.95-plain-bf16
2
+ report_to: wandb
3
+ output_dir: outputs//MaskBit_benchmark/DiMO2-GPUS2-bs64-glr5e-07-flr5e-07-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-non_saturating-dlr5e-07-g_w200.0-d_w1.0-dm_w1.0-g_noise_0.-0.95-plain-bf16/2025-08-25T18-34
4
+ pretrained_model_path: ./pretrained_maskgit/MaskGIT/MaskGIT_ImageNet_256.pth
5
+ pretrained_vae_path: ./pretrained_maskgit/VQGAN/
6
+ load_dimo_pre_train: outputs/MaskBit_benchmark/DiMO2-GPUS4-bs64-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-r_modes_arccos-arccos-non_saturating-dlr1e-06-g_w25.0-d_w1.0-dm_w1.0-g_noise_continuous_0.-0.95-plain-bf16/2025-08-17T14-45/meta_checkpoints_best_fid/ema_model/pytorch_model.bin
7
+ wandb_project: DiMO_GAN_MaskBit
8
+ wandb_user: yuanzhi_zhu
9
+ ema_cpu: false
10
+ ema_freq: 1
11
+ ema_rampup: true
12
+ max_train_steps: 200001
13
+ log_steps: 50
14
+ validation_steps: 200
15
+ validation_steps_tuple:
16
+ - 2
17
+ - 50
18
+ - 200
19
+ - 500
20
+ metric_type: fid
21
+ metric_steps: 2000
22
+ metric_steps_tuple:
23
+ - 2000
24
+ - 5000
25
+ scale_lr: false
26
+ lr_warmup_steps: 500
27
+ lr_scheduler_type: constant_with_warmup
28
+ train_batch_size: 64
29
+ loss_reduction: sum
30
+ optimizer_type: adamw
31
+ adam_beta1: 0.9
32
+ adam_beta2: 0.999
33
+ fake_adam_beta1: 0.9
34
+ fake_adam_beta2: 0.999
35
+ adam_weight_decay: 0.01
36
+ adam_epsilon: 1.0e-06
37
+ max_grad_norm: 1.0
38
+ gradient_accumulation_steps: 1
39
+ gradient_checkpointing: false
40
+ checkpointing_steps: 1000
41
+ mixed_precision: bf16
42
+ enable_xformers_memory_efficient_attention: false
43
+ global_seed: 3407
44
+ is_debug: false
45
+ cast_models_to_dtype: false
46
+ checkpoints_total_limit: 5
47
+ checkpointing_steps_tuple:
48
+ - 5000
49
+ - 10000
50
+ - 15000
51
+ - 20000
52
+ - 25000
53
+ - 30000
54
+ - 40000
55
+ - 50000
56
+ - 60000
57
+ - 70000
58
+ - 80000
59
+ - 90000
60
+ - 100000
61
+ resume_from_checkpoint_path: ''
62
+ no_progress_bar: true
63
+ generator_lr: 5.0e-07
64
+ fake_rounds: 1
65
+ weight_factor_type: uniform
66
+ dm_loss_weight: 1.0
67
+ distil_loss_type: FKL
68
+ fixed_ratio: fix_0.6
69
+ noise_emb_perturb: fix_0.0
70
+ top_k: 0
71
+ top_p: 0.0
72
+ true_cfg: 1.5
73
+ fake_cfg_eval: 1.0
74
+ fake_cfg_train: 1.0
75
+ fake_lr: 5.0e-07
76
+ fake_cfg_drop_ratio: 0.0
77
+ gen_temp: 1.0
78
+ true_temp: 1.0
79
+ fake_temp: 1.0
80
+ ignore_index: -1
81
+ ratio_mode: arccos
82
+ ratio_mode_fake: arccos
83
+ temperature_fake: 1.0
84
+ alpha_fake: 0.0
85
+ fix_emb_layer: true
86
+ cfg_schedule: constant
87
+ emb_input: false
88
+ use_discriminator: 1.0
89
+ discriminator_lr: 5.0e-07
90
+ g_loss_weight: 200.0
91
+ d_loss_weight: 1.0
92
+ gan_noise_level: continuous_0.-0.95
93
+ noisy_as_masked_emb: true
94
+ noisy_as_noisy_feat: false
95
+ noisy_as_noisy_emb: false
96
+ same_maskemb: true
97
+ generator_loss_type: non_saturating
98
+ use_discriminator_steps: 1000
99
+ emb_mode: plain
100
+ tau: 0.5
101
+ tau_min: 0.1
102
+ dis_min_ch: 256
103
+ dis_f_layer: 4
104
+ dis_emb_input: true
105
+ dis_t_cond: true
106
+ dis_adam_beta1: 0.0
107
+ dis_adam_beta2: 0.999
108
+ sched_mode: arccos
109
+ sampling_step: 64
110
+ mask_value: 1024
111
+ cfg_w: 5.8
112
+ r_temp: 10.3
113
+ sm_temp: 1.0
114
+ Jeffreys_beta: 0.0
115
+ run_id: 2025-08-25T18-34
116
+ precision: bf16
117
+ folder_name: DiMO2-GPUS2-bs64-glr5e-07-flr5e-07-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-non_saturating-dlr5e-07-g_w200.0-d_w1.0-dm_w1.0-g_noise_0.-0.95-plain-bf16/2025-08-25T18-34
118
+ output_name: /MaskBit_benchmark
119
+ ema_decay: 0.9999
120
+ resolution: 256
maskbit/DiMO2-GPUS2-bs64-glr5e-07-flr5e-07-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-non_saturating-dlr5e-07-g_w200.0-d_w1.0-dm_w1.0-g_noise_0.-0.95-plain-bf16_checkpoint-50000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1423dfbe97093c93a23b963e599f34b202bdaef9bfb101e0955f35d1bb0d0a7
3
+ size 1219832638
maskbit/DiMO2-GPUS4-bs64-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-r_modes_arccos-arccos-non_saturating-dlr1e-06-g_w25.0-d_w1.0-dm_w1.0-g_noise_continuous_0.-0.95-plain-bf16_checkpoint-50000/config.yaml ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: DiMO2-GPUS4-bs64-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-r_modes_arccos-arccos-non_saturating-dlr1e-06-g_w25.0-d_w1.0-dm_w1.0-g_noise_continuous_0.-0.95-plain-bf16
2
+ report_to: wandb
3
+ output_dir: outputs//MaskBit_benchmark/DiMO2-GPUS4-bs64-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-r_modes_arccos-arccos-non_saturating-dlr1e-06-g_w25.0-d_w1.0-dm_w1.0-g_noise_continuous_0.-0.95-plain-bf16/2025-08-17T14-45
4
+ pretrained_model_path: ./pretrained_maskgit/MaskGIT/MaskGIT_ImageNet_256.pth
5
+ pretrained_vae_path: ./pretrained_maskgit/VQGAN/
6
+ load_dimo_pre_train: ./outputs/MaskBit_benchmark/DDMD-GPUS1-bs128-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-emb_pert_fix_0.1-tcfg1.5-fcfg1.0-fcfgt1.0-r_mode_arccos-r_mode_f_arccos-reduce_sum-seed3407-bf16/2025-08-11T16-13/_checkpoint-50000/ema_model/pytorch_model.bin
7
+ wandb_project: DiMO_GAN_MaskBit
8
+ wandb_user: yuanzhi_zhu
9
+ ema_cpu: false
10
+ ema_freq: 1
11
+ ema_rampup: true
12
+ max_train_steps: 200001
13
+ log_steps: 50
14
+ validation_steps: 200
15
+ validation_steps_tuple:
16
+ - 2
17
+ - 50
18
+ - 200
19
+ - 500
20
+ metric_type: fid
21
+ metric_steps: 2000
22
+ metric_steps_tuple:
23
+ - 2000
24
+ - 5000
25
+ scale_lr: false
26
+ lr_warmup_steps: 500
27
+ lr_scheduler_type: constant_with_warmup
28
+ train_batch_size: 64
29
+ loss_reduction: sum
30
+ optimizer_type: adam
31
+ adam_beta1: 0.9
32
+ adam_beta2: 0.999
33
+ fake_adam_beta1: 0.9
34
+ fake_adam_beta2: 0.999
35
+ adam_weight_decay: 0.0
36
+ adam_epsilon: 1.0e-06
37
+ max_grad_norm: 1.0
38
+ gradient_accumulation_steps: 1
39
+ gradient_checkpointing: false
40
+ checkpointing_steps: 1000
41
+ mixed_precision: bf16
42
+ enable_xformers_memory_efficient_attention: false
43
+ global_seed: 3407
44
+ is_debug: false
45
+ cast_models_to_dtype: false
46
+ checkpoints_total_limit: 5
47
+ checkpointing_steps_tuple:
48
+ - 5000
49
+ - 10000
50
+ - 15000
51
+ - 20000
52
+ - 25000
53
+ - 30000
54
+ - 40000
55
+ - 50000
56
+ - 60000
57
+ - 70000
58
+ - 80000
59
+ - 90000
60
+ - 100000
61
+ resume_from_checkpoint_path: ''
62
+ no_progress_bar: true
63
+ generator_lr: 1.0e-06
64
+ fake_rounds: 1
65
+ weight_factor_type: uniform
66
+ dm_loss_weight: 1.0
67
+ distil_loss_type: FKL
68
+ fixed_ratio: fix_0.6
69
+ noise_emb_perturb: fix_0.0
70
+ top_k: 0
71
+ top_p: 0.0
72
+ true_cfg: 1.5
73
+ fake_cfg_eval: 1.0
74
+ fake_cfg_train: 1.0
75
+ fake_lr: 1.0e-06
76
+ fake_cfg_drop_ratio: 0.0
77
+ gen_temp: 1.0
78
+ true_temp: 1.0
79
+ fake_temp: 1.0
80
+ ignore_index: -1
81
+ ratio_mode: arccos
82
+ ratio_mode_fake: arccos
83
+ temperature_fake: 1.0
84
+ alpha_fake: 0.0
85
+ fix_emb_layer: true
86
+ cfg_schedule: constant
87
+ emb_input: false
88
+ use_discriminator: 1.0
89
+ discriminator_lr: 1.0e-06
90
+ g_loss_weight: 25.0
91
+ d_loss_weight: 1.0
92
+ gan_noise_level: continuous_0.-0.95
93
+ noisy_as_masked_emb: true
94
+ noisy_as_noisy_feat: false
95
+ noisy_as_noisy_emb: false
96
+ same_maskemb: true
97
+ generator_loss_type: non_saturating
98
+ use_discriminator_steps: 1000
99
+ emb_mode: plain
100
+ tau: 0.5
101
+ tau_min: 0.1
102
+ dis_min_ch: 256
103
+ dis_f_layer: 4
104
+ dis_emb_input: true
105
+ dis_t_cond: true
106
+ dis_adam_beta1: 0.0
107
+ dis_adam_beta2: 0.999
108
+ sched_mode: arccos
109
+ sampling_step: 64
110
+ mask_value: 1024
111
+ cfg_w: 5.8
112
+ r_temp: 10.3
113
+ sm_temp: 1.0
114
+ Jeffreys_beta: 0.0
115
+ run_id: 2025-08-17T14-45
116
+ precision: bf16
117
+ folder_name: DiMO2-GPUS4-bs64-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-r_modes_arccos-arccos-non_saturating-dlr1e-06-g_w25.0-d_w1.0-dm_w1.0-g_noise_continuous_0.-0.95-plain-bf16/2025-08-17T14-45
118
+ output_name: /MaskBit_benchmark
119
+ ema_decay: 0.9999
120
+ resolution: 256
maskbit/DiMO2-GPUS4-bs64-glr1e-06-flr1e-06-FKL-fix_rfix_0.6-cfgs1.5-1.0-1.0-r_modes_arccos-arccos-non_saturating-dlr1e-06-g_w25.0-d_w1.0-dm_w1.0-g_noise_continuous_0.-0.95-plain-bf16_checkpoint-50000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32351716414757c5fdc917de7c96889f253a9a4d645a33e1b2a5168af5f170f0
3
+ size 1219832638