dxcanh commited on
Commit
4f0d5c2
Β·
verified Β·
1 Parent(s): d914b5c

Upload 5 files

Browse files
options/CodeFormer_colorization.yml CHANGED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: CodeFormer_colorization
3
+ model_type: CodeFormerIdxModel
4
+ num_gpu: 8
5
+ manual_seed: 0
6
+
7
+ # dataset and data loader settings
8
+ datasets:
9
+ train:
10
+ name: FFHQ
11
+ type: FFHQBlindDataset
12
+ dataroot_gt: datasets/ffhq/ffhq_512
13
+ filename_tmpl: '{}'
14
+ io_backend:
15
+ type: disk
16
+
17
+ in_size: 512
18
+ gt_size: 512
19
+ mean: [0.5, 0.5, 0.5]
20
+ std: [0.5, 0.5, 0.5]
21
+ use_hflip: true
22
+ use_corrupt: true
23
+
24
+ # large degradation in stageII
25
+ blur_kernel_size: 41
26
+ use_motion_kernel: false
27
+ motion_kernel_prob: 0.001
28
+ kernel_list: ['iso', 'aniso']
29
+ kernel_prob: [0.5, 0.5]
30
+ blur_sigma: [1, 15]
31
+ downsample_range: [4, 30]
32
+ noise_range: [0, 20]
33
+ jpeg_range: [30, 80]
34
+
35
+ # color jitter and gray
36
+ color_jitter_prob: 0.3
37
+ color_jitter_shift: 20
38
+ color_jitter_pt_prob: 0.3
39
+ gray_prob: 0.01
40
+
41
+ latent_gt_path: ~ # without pre-calculated latent code
42
+ # latent_gt_path: './experiments/pretrained_models/VQGAN/latent_gt_code1024.pth'
43
+
44
+ # data loader
45
+ num_worker_per_gpu: 2
46
+ batch_size_per_gpu: 4
47
+ dataset_enlarge_ratio: 100
48
+ prefetch_mode: ~
49
+
50
+ # val:
51
+ # name: CelebA-HQ-512
52
+ # type: PairedImageDataset
53
+ # dataroot_lq: datasets/faces/validation/lq
54
+ # dataroot_gt: datasets/faces/validation/gt
55
+ # io_backend:
56
+ # type: disk
57
+ # mean: [0.5, 0.5, 0.5]
58
+ # std: [0.5, 0.5, 0.5]
59
+ # scale: 1
60
+
61
+ # network structures
62
+ network_g:
63
+ type: CodeFormer
64
+ dim_embd: 512
65
+ n_head: 8
66
+ n_layers: 9
67
+ codebook_size: 1024
68
+ connect_list: ['32', '64', '128', '256']
69
+ fix_modules: ['quantize','generator']
70
+ vqgan_path: './experiments/pretrained_models/vqgan/vqgan_code1024.pth' # pretrained VQGAN
71
+
72
+ network_vqgan: # this config is needed if no pre-calculated latent
73
+ type: VQAutoEncoder
74
+ img_size: 512
75
+ nf: 64
76
+ ch_mult: [1, 2, 2, 4, 4, 8]
77
+ quantizer: 'nearest'
78
+ codebook_size: 1024
79
+
80
+ # path
81
+ path:
82
+ pretrain_network_g: ~
83
+ param_key_g: params_ema
84
+ strict_load_g: false
85
+ pretrain_network_d: ~
86
+ strict_load_d: true
87
+ resume_state: ~
88
+
89
+ # base_lr(4.5e-6)*bach_size(4)
90
+ train:
91
+ use_hq_feat_loss: true
92
+ feat_loss_weight: 1.0
93
+ cross_entropy_loss: true
94
+ entropy_loss_weight: 0.5
95
+ fidelity_weight: 0
96
+
97
+ optim_g:
98
+ type: Adam
99
+ lr: !!float 1e-4
100
+ weight_decay: 0
101
+ betas: [0.9, 0.99]
102
+
103
+ scheduler:
104
+ type: MultiStepLR
105
+ milestones: [400000, 450000]
106
+ gamma: 0.5
107
+
108
+ total_iter: 500000
109
+
110
+ warmup_iter: -1 # no warm up
111
+ ema_decay: 0.995
112
+
113
+ use_adaptive_weight: true
114
+
115
+ net_g_start_iter: 0
116
+ net_d_iters: 1
117
+ net_d_start_iter: 0
118
+ manual_seed: 0
119
+
120
+ # validation settings
121
+ val:
122
+ val_freq: !!float 5e10 # no validation
123
+ save_img: true
124
+
125
+ metrics:
126
+ psnr: # metric name, can be arbitrary
127
+ type: calculate_psnr
128
+ crop_border: 4
129
+ test_y_channel: false
130
+
131
+ # logging settings
132
+ logger:
133
+ print_freq: 100
134
+ save_checkpoint_freq: !!float 1e4
135
+ use_tb_logger: true
136
+ wandb:
137
+ project: ~
138
+ resume_id: ~
139
+
140
+ # dist training settings
141
+ dist_params:
142
+ backend: nccl
143
+ port: 29419
144
+
145
+ find_unused_parameters: true
options/CodeFormer_inpainting.yml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: CodeFormer_inpainting
3
+ model_type: CodeFormerModel
4
+ num_gpu: 4
5
+ manual_seed: 0
6
+
7
+ # dataset and data loader settings
8
+ datasets:
9
+ train:
10
+ name: FFHQ
11
+ type: FFHQBlindDataset
12
+ dataroot_gt: datasets/ffhq/ffhq_512
13
+ filename_tmpl: '{}'
14
+ io_backend:
15
+ type: disk
16
+
17
+ in_size: 512
18
+ gt_size: 512
19
+ mean: [0.5, 0.5, 0.5]
20
+ std: [0.5, 0.5, 0.5]
21
+ use_hflip: true
22
+ use_corrupt: false
23
+ gen_inpaint_mask: true
24
+
25
+ latent_gt_path: ~ # without pre-calculated latent code
26
+ # latent_gt_path: './experiments/pretrained_models/VQGAN/latent_gt_code1024.pth'
27
+
28
+ # data loader
29
+ num_worker_per_gpu: 2
30
+ batch_size_per_gpu: 3
31
+ dataset_enlarge_ratio: 100
32
+ prefetch_mode: ~
33
+
34
+ # val:
35
+ # name: CelebA-HQ-512
36
+ # type: PairedImageDataset
37
+ # dataroot_lq: datasets/faces/validation/lq
38
+ # dataroot_gt: datasets/faces/validation/gt
39
+ # io_backend:
40
+ # type: disk
41
+ # mean: [0.5, 0.5, 0.5]
42
+ # std: [0.5, 0.5, 0.5]
43
+ # scale: 1
44
+
45
+ # network structures
46
+ network_g:
47
+ type: CodeFormer
48
+ dim_embd: 512
49
+ n_head: 8
50
+ n_layers: 9
51
+ codebook_size: 1024
52
+ connect_list: ['32', '64', '128']
53
+ fix_modules: ['quantize','generator']
54
+ vqgan_path: './experiments/pretrained_models/vqgan/vqgan_code1024.pth' # pretrained VQGAN
55
+
56
+ network_vqgan: # this config is needed if no pre-calculated latent
57
+ type: VQAutoEncoder
58
+ img_size: 512
59
+ nf: 64
60
+ ch_mult: [1, 2, 2, 4, 4, 8]
61
+ quantizer: 'nearest'
62
+ codebook_size: 1024
63
+
64
+ network_d:
65
+ type: VQGANDiscriminator
66
+ nc: 3
67
+ ndf: 64
68
+ n_layers: 4
69
+ model_path: ~
70
+
71
+ # path
72
+ path:
73
+ pretrain_network_g: ~
74
+ param_key_g: params_ema
75
+ strict_load_g: true
76
+ pretrain_network_d: ~
77
+ strict_load_d: true
78
+ resume_state: ~
79
+
80
+ # base_lr(4.5e-6)*bach_size(4)
81
+ train:
82
+ use_hq_feat_loss: true
83
+ feat_loss_weight: 1.0
84
+ cross_entropy_loss: true
85
+ entropy_loss_weight: 0.5
86
+ scale_adaptive_gan_weight: 0.1
87
+ fidelity_weight: 1.0
88
+
89
+ optim_g:
90
+ type: Adam
91
+ lr: !!float 7e-5
92
+ weight_decay: 0
93
+ betas: [0.9, 0.99]
94
+ optim_d:
95
+ type: Adam
96
+ lr: !!float 7e-5
97
+ weight_decay: 0
98
+ betas: [0.9, 0.99]
99
+
100
+ scheduler:
101
+ type: MultiStepLR
102
+ milestones: [250000, 300000]
103
+ gamma: 0.5
104
+
105
+ total_iter: 300000
106
+
107
+ warmup_iter: -1 # no warm up
108
+ ema_decay: 0.997
109
+
110
+ pixel_opt:
111
+ type: L1Loss
112
+ loss_weight: 1.0
113
+ reduction: mean
114
+
115
+ perceptual_opt:
116
+ type: LPIPSLoss
117
+ loss_weight: 1.0
118
+ use_input_norm: true
119
+ range_norm: true
120
+
121
+ gan_opt:
122
+ type: GANLoss
123
+ gan_type: hinge
124
+ loss_weight: !!float 1.0 # adaptive_weighting
125
+
126
+
127
+ use_adaptive_weight: true
128
+
129
+ net_g_start_iter: 0
130
+ net_d_iters: 1
131
+ net_d_start_iter: 296001
132
+ manual_seed: 0
133
+
134
+ # validation settings
135
+ val:
136
+ val_freq: !!float 5e10 # no validation
137
+ save_img: true
138
+
139
+ metrics:
140
+ psnr: # metric name, can be arbitrary
141
+ type: calculate_psnr
142
+ crop_border: 4
143
+ test_y_channel: false
144
+
145
+ # logging settings
146
+ logger:
147
+ print_freq: 100
148
+ save_checkpoint_freq: !!float 1e4
149
+ use_tb_logger: true
150
+ wandb:
151
+ project: ~
152
+ resume_id: ~
153
+
154
+ # dist training settings
155
+ dist_params:
156
+ backend: nccl
157
+ port: 29420
158
+
159
+ find_unused_parameters: true
options/CodeFormer_stage2.yml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: CodeFormer_stage2
3
+ model_type: CodeFormerIdxModel
4
+ num_gpu: 8
5
+ manual_seed: 0
6
+
7
+ # dataset and data loader settings
8
+ datasets:
9
+ train:
10
+ name: FFHQ
11
+ type: FFHQBlindDataset
12
+ dataroot_gt: datasets/ffhq/ffhq_512
13
+ filename_tmpl: '{}'
14
+ io_backend:
15
+ type: disk
16
+
17
+ in_size: 512
18
+ gt_size: 512
19
+ mean: [0.5, 0.5, 0.5]
20
+ std: [0.5, 0.5, 0.5]
21
+ use_hflip: true
22
+ use_corrupt: true
23
+
24
+ # large degradation in stageII
25
+ blur_kernel_size: 41
26
+ use_motion_kernel: false
27
+ motion_kernel_prob: 0.001
28
+ kernel_list: ['iso', 'aniso']
29
+ kernel_prob: [0.5, 0.5]
30
+ blur_sigma: [1, 15]
31
+ downsample_range: [4, 30]
32
+ noise_range: [0, 20]
33
+ jpeg_range: [30, 80]
34
+
35
+ latent_gt_path: ~ # without pre-calculated latent code
36
+ # latent_gt_path: './experiments/pretrained_models/VQGAN/latent_gt_code1024.pth'
37
+
38
+ # data loader
39
+ num_worker_per_gpu: 2
40
+ batch_size_per_gpu: 4
41
+ dataset_enlarge_ratio: 100
42
+ prefetch_mode: ~
43
+
44
+ # val:
45
+ # name: CelebA-HQ-512
46
+ # type: PairedImageDataset
47
+ # dataroot_lq: datasets/faces/validation/lq
48
+ # dataroot_gt: datasets/faces/validation/gt
49
+ # io_backend:
50
+ # type: disk
51
+ # mean: [0.5, 0.5, 0.5]
52
+ # std: [0.5, 0.5, 0.5]
53
+ # scale: 1
54
+
55
+ # network structures
56
+ network_g:
57
+ type: CodeFormer
58
+ dim_embd: 512
59
+ n_head: 8
60
+ n_layers: 9
61
+ codebook_size: 1024
62
+ connect_list: ['32', '64', '128', '256']
63
+ fix_modules: ['quantize','generator']
64
+ vqgan_path: './experiments/pretrained_models/vqgan/vqgan_code1024.pth' # pretrained VQGAN
65
+
66
+ network_vqgan: # this config is needed if no pre-calculated latent
67
+ type: VQAutoEncoder
68
+ img_size: 512
69
+ nf: 64
70
+ ch_mult: [1, 2, 2, 4, 4, 8]
71
+ quantizer: 'nearest'
72
+ codebook_size: 1024
73
+
74
+ # path
75
+ path:
76
+ pretrain_network_g: ~
77
+ param_key_g: params_ema
78
+ strict_load_g: false
79
+ pretrain_network_d: ~
80
+ strict_load_d: true
81
+ resume_state: ~
82
+
83
+ # base_lr(4.5e-6)*bach_size(4)
84
+ train:
85
+ use_hq_feat_loss: true
86
+ feat_loss_weight: 1.0
87
+ cross_entropy_loss: true
88
+ entropy_loss_weight: 0.5
89
+ fidelity_weight: 0
90
+
91
+ optim_g:
92
+ type: Adam
93
+ lr: !!float 1e-4
94
+ weight_decay: 0
95
+ betas: [0.9, 0.99]
96
+
97
+ scheduler:
98
+ type: MultiStepLR
99
+ milestones: [400000, 450000]
100
+ gamma: 0.5
101
+
102
+ # scheduler:
103
+ # type: CosineAnnealingRestartLR
104
+ # periods: [500000]
105
+ # restart_weights: [1]
106
+ # eta_min: !!float 2e-5 # no lr reduce in official vqgan code
107
+
108
+ total_iter: 500000
109
+
110
+ warmup_iter: -1 # no warm up
111
+ ema_decay: 0.995
112
+
113
+ use_adaptive_weight: true
114
+
115
+ net_g_start_iter: 0
116
+ net_d_iters: 1
117
+ net_d_start_iter: 0
118
+ manual_seed: 0
119
+
120
+ # validation settings
121
+ val:
122
+ val_freq: !!float 5e10 # no validation
123
+ save_img: true
124
+
125
+ metrics:
126
+ psnr: # metric name, can be arbitrary
127
+ type: calculate_psnr
128
+ crop_border: 4
129
+ test_y_channel: false
130
+
131
+ # logging settings
132
+ logger:
133
+ print_freq: 100
134
+ save_checkpoint_freq: !!float 1e4
135
+ use_tb_logger: true
136
+ wandb:
137
+ project: ~
138
+ resume_id: ~
139
+
140
+ # dist training settings
141
+ dist_params:
142
+ backend: nccl
143
+ port: 29412
144
+
145
+ find_unused_parameters: true
options/CodeFormer_stage3.yml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: CodeFormer_stage3
3
+ model_type: CodeFormerJointModel
4
+ num_gpu: 8
5
+ manual_seed: 0
6
+
7
+ # dataset and data loader settings
8
+ datasets:
9
+ train:
10
+ name: FFHQ
11
+ type: FFHQBlindJointDataset
12
+ dataroot_gt: datasets/ffhq/ffhq_512
13
+ filename_tmpl: '{}'
14
+ io_backend:
15
+ type: disk
16
+
17
+ in_size: 512
18
+ gt_size: 512
19
+ mean: [0.5, 0.5, 0.5]
20
+ std: [0.5, 0.5, 0.5]
21
+ use_hflip: true
22
+ use_corrupt: true
23
+
24
+ blur_kernel_size: 41
25
+ use_motion_kernel: false
26
+ motion_kernel_prob: 0.001
27
+ kernel_list: ['iso', 'aniso']
28
+ kernel_prob: [0.5, 0.5]
29
+ # small degradation in stageIII
30
+ blur_sigma: [0.1, 10]
31
+ downsample_range: [1, 12]
32
+ noise_range: [0, 15]
33
+ jpeg_range: [60, 100]
34
+ # large degradation in stageII
35
+ blur_sigma_large: [1, 15]
36
+ downsample_range_large: [4, 30]
37
+ noise_range_large: [0, 20]
38
+ jpeg_range_large: [30, 80]
39
+
40
+ latent_gt_path: ~ # without pre-calculated latent code
41
+ # latent_gt_path: './experiments/pretrained_models/VQGAN/latent_gt_code1024.pth'
42
+
43
+ # data loader
44
+ num_worker_per_gpu: 1
45
+ batch_size_per_gpu: 3
46
+ dataset_enlarge_ratio: 100
47
+ prefetch_mode: ~
48
+
49
+ # val:
50
+ # name: CelebA-HQ-512
51
+ # type: PairedImageDataset
52
+ # dataroot_lq: datasets/faces/validation/lq
53
+ # dataroot_gt: datasets/faces/validation/gt
54
+ # io_backend:
55
+ # type: disk
56
+ # mean: [0.5, 0.5, 0.5]
57
+ # std: [0.5, 0.5, 0.5]
58
+ # scale: 1
59
+
60
+ # network structures
61
+ network_g:
62
+ type: CodeFormer
63
+ dim_embd: 512
64
+ n_head: 8
65
+ n_layers: 9
66
+ codebook_size: 1024
67
+ connect_list: ['32', '64', '128', '256']
68
+ fix_modules: ['quantize','generator']
69
+
70
+ network_vqgan: # this config is needed if no pre-calculated latent
71
+ type: VQAutoEncoder
72
+ img_size: 512
73
+ nf: 64
74
+ ch_mult: [1, 2, 2, 4, 4, 8]
75
+ quantizer: 'nearest'
76
+ codebook_size: 1024
77
+
78
+ network_d:
79
+ type: VQGANDiscriminator
80
+ nc: 3
81
+ ndf: 64
82
+ n_layers: 4
83
+
84
+ # path
85
+ path:
86
+ pretrain_network_g: './experiments/pretrained_models/CodeFormer_stage2/net_g_latest.pth' # pretrained G model in StageII
87
+ param_key_g: params_ema
88
+ strict_load_g: true
89
+ pretrain_network_d: './experiments/pretrained_models/CodeFormer_stage2/net_d_latest.pth' # pretrained D model in StageII
90
+ resume_state: ~
91
+
92
+ # base_lr(4.5e-6)*bach_size(4)
93
+ train:
94
+ use_hq_feat_loss: true
95
+ feat_loss_weight: 1.0
96
+ cross_entropy_loss: true
97
+ entropy_loss_weight: 0.5
98
+ scale_adaptive_gan_weight: 0.1
99
+
100
+ optim_g:
101
+ type: Adam
102
+ lr: !!float 5e-5
103
+ weight_decay: 0
104
+ betas: [0.9, 0.99]
105
+ optim_d:
106
+ type: Adam
107
+ lr: !!float 5e-5
108
+ weight_decay: 0
109
+ betas: [0.9, 0.99]
110
+
111
+ scheduler:
112
+ type: CosineAnnealingRestartLR
113
+ periods: [150000]
114
+ restart_weights: [1]
115
+ eta_min: !!float 2e-5
116
+
117
+
118
+ total_iter: 150000
119
+
120
+ warmup_iter: -1 # no warm up
121
+ ema_decay: 0.997
122
+
123
+ pixel_opt:
124
+ type: L1Loss
125
+ loss_weight: 1.0
126
+ reduction: mean
127
+
128
+ perceptual_opt:
129
+ type: LPIPSLoss
130
+ loss_weight: 1.0
131
+ use_input_norm: true
132
+ range_norm: true
133
+
134
+ gan_opt:
135
+ type: GANLoss
136
+ gan_type: hinge
137
+ loss_weight: !!float 1.0 # adaptive_weighting
138
+
139
+ use_adaptive_weight: true
140
+
141
+ net_g_start_iter: 0
142
+ net_d_iters: 1
143
+ net_d_start_iter: 5001
144
+ manual_seed: 0
145
+
146
+ # validation settings
147
+ val:
148
+ val_freq: !!float 5e10 # no validation
149
+ save_img: true
150
+
151
+ metrics:
152
+ psnr: # metric name, can be arbitrary
153
+ type: calculate_psnr
154
+ crop_border: 4
155
+ test_y_channel: false
156
+
157
+ # logging settings
158
+ logger:
159
+ print_freq: 100
160
+ save_checkpoint_freq: !!float 5e3
161
+ use_tb_logger: true
162
+ wandb:
163
+ project: ~
164
+ resume_id: ~
165
+
166
+ # dist training settings
167
+ dist_params:
168
+ backend: nccl
169
+ port: 29413
170
+
171
+ find_unused_parameters: true
options/VQGAN_512_ds32_nearest_stage1.yml ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: VQGAN-512-ds32-nearest-stage1
3
+ model_type: VQGANModel
4
+ num_gpu: 8
5
+ manual_seed: 0
6
+
7
+ # dataset and data loader settings
8
+ datasets:
9
+ train:
10
+ name: FFHQ
11
+ type: FFHQBlindDataset
12
+ dataroot_gt: datasets/ffhq/ffhq_512
13
+ filename_tmpl: '{}'
14
+ io_backend:
15
+ type: disk
16
+
17
+ in_size: 512
18
+ gt_size: 512
19
+ mean: [0.5, 0.5, 0.5]
20
+ std: [0.5, 0.5, 0.5]
21
+ use_hflip: true
22
+ use_corrupt: false # for VQGAN
23
+
24
+ # data loader
25
+ num_worker_per_gpu: 2
26
+ batch_size_per_gpu: 4
27
+ dataset_enlarge_ratio: 100
28
+
29
+ prefetch_mode: cpu
30
+ num_prefetch_queue: 4
31
+
32
+ # val:
33
+ # name: CelebA-HQ-512
34
+ # type: PairedImageDataset
35
+ # dataroot_lq: datasets/faces/validation/gt
36
+ # dataroot_gt: datasets/faces/validation/gt
37
+ # io_backend:
38
+ # type: disk
39
+ # mean: [0.5, 0.5, 0.5]
40
+ # std: [0.5, 0.5, 0.5]
41
+ # scale: 1
42
+
43
+ # network structures
44
+ network_g:
45
+ type: VQAutoEncoder
46
+ img_size: 512
47
+ nf: 64
48
+ ch_mult: [1, 2, 2, 4, 4, 8]
49
+ quantizer: 'nearest'
50
+ codebook_size: 1024
51
+
52
+ network_d:
53
+ type: VQGANDiscriminator
54
+ nc: 3
55
+ ndf: 64
56
+
57
+ # path
58
+ path:
59
+ pretrain_network_g: ~
60
+ param_key_g: params_ema
61
+ strict_load_g: true
62
+ pretrain_network_d: ~
63
+ strict_load_d: true
64
+ resume_state: ~
65
+
66
+ # base_lr(4.5e-6)*bach_size(4)
67
+ train:
68
+ optim_g:
69
+ type: Adam
70
+ lr: !!float 7e-5
71
+ weight_decay: 0
72
+ betas: [0.9, 0.99]
73
+ optim_d:
74
+ type: Adam
75
+ lr: !!float 7e-5
76
+ weight_decay: 0
77
+ betas: [0.9, 0.99]
78
+
79
+ scheduler:
80
+ type: CosineAnnealingRestartLR
81
+ periods: [1600000]
82
+ restart_weights: [1]
83
+ eta_min: !!float 6e-5 # no lr reduce in official vqgan code
84
+
85
+ total_iter: 1600000
86
+
87
+ warmup_iter: -1 # no warm up
88
+ ema_decay: 0.995 # GFPGAN: 0.5**(32 / (10 * 1000) == 0.998; Unleashing: 0.995
89
+
90
+ pixel_opt:
91
+ type: L1Loss
92
+ loss_weight: 1.0
93
+ reduction: mean
94
+
95
+ perceptual_opt:
96
+ type: LPIPSLoss
97
+ loss_weight: 1.0
98
+ use_input_norm: true
99
+ range_norm: true
100
+
101
+ gan_opt:
102
+ type: GANLoss
103
+ gan_type: hinge
104
+ loss_weight: !!float 1.0 # adaptive_weighting
105
+
106
+ net_g_start_iter: 0
107
+ net_d_iters: 1
108
+ net_d_start_iter: 30001
109
+ manual_seed: 0
110
+
111
+ # validation settings
112
+ val:
113
+ val_freq: !!float 5e10 # no validation
114
+ save_img: true
115
+
116
+ metrics:
117
+ psnr: # metric name, can be arbitrary
118
+ type: calculate_psnr
119
+ crop_border: 4
120
+ test_y_channel: false
121
+
122
+ # logging settings
123
+ logger:
124
+ print_freq: 100
125
+ save_checkpoint_freq: !!float 1e4
126
+ use_tb_logger: true
127
+ wandb:
128
+ project: ~
129
+ resume_id: ~
130
+
131
+ # dist training settings
132
+ dist_params:
133
+ backend: nccl
134
+ port: 29411
135
+
136
+ find_unused_parameters: true