dyflional10 commited on
Commit
c514883
·
verified ·
1 Parent(s): 7c5ab85

Upload configs/main.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/main.yaml +146 -0
configs/main.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __object__:
2
+ path: projects.video_vae_v3.train_image
3
+ name: ImageAutoencoderAdversarialTrainer
4
+ data:
5
+ training:
6
+ img_256:
7
+ path: data.image.configs.vae.t2i_good
8
+ resolution: 256
9
+ format: tensor
10
+ batch_size: 16
11
+ sample_weight: 0.25
12
+ num_workers: 8
13
+ prefetch_factor: 16
14
+ validation:
15
+ path: data.image.configs.benchmark.coco_2017
16
+ resolution: 256
17
+ format: pil
18
+ batch_size: 3
19
+ num_workers: 2
20
+ prefetch_factor: 16
21
+ vae:
22
+ ema:
23
+ decay: 0.9995
24
+ interval: 1
25
+ on_cpu: false
26
+ model:
27
+ __object__:
28
+ path: models.dino_v3.image_vae_dinov3_encode
29
+ name: AutoencoderKL
30
+ args: as_params
31
+ enc_block_out_channels:
32
+ - 128
33
+ - 256
34
+ - 384
35
+ - 512
36
+ - 768
37
+ dec_block_out_channels:
38
+ - 1280
39
+ - 1024
40
+ - 512
41
+ - 256
42
+ - 128
43
+ enc_layers_per_block: 2
44
+ dec_layers_per_block: 3
45
+ in_channels: 3
46
+ latent_channels: 1280
47
+ out_channels: 3
48
+ use_quant_conv: false
49
+ use_post_quant_conv: false
50
+ spatial_downsample_factor: 16
51
+ variational: false
52
+ running_mode: enc_dec
53
+ noise_tau: 0.8
54
+ denormalize_decoder_output: true
55
+ random_masking_channel_ratio: 0.0
56
+ running_mode: init_loading_whole_ckpt
57
+ pretrained_pths: hdfs://haruna/home/byte_data_seed/hdd_hldy/user/ming.li1/work_dirs/vfm_exp/ming_dino_vae_exp/dino-vae_decoder-only_noise-0.8_denormalize/states/0000485000/
58
+ dis:
59
+ model:
60
+ __object__:
61
+ path: models.video_vae_v4.modules.discriminators.patchgan
62
+ name: PatchDiscriminator2d
63
+ args: as_params
64
+ in_channels: 3
65
+ sync_batch_norm: false
66
+ lpips:
67
+ net: vgg
68
+ pretrained: true
69
+ training:
70
+ project: ming_dino_vae_exp
71
+ name: 1206_sae-e2e_from_pt_w_noise_0.8_denormalize_kl500
72
+ total_steps: 1500000
73
+ warmup_steps: 5000
74
+ precision: tf32
75
+ memory_format: channels_first
76
+ compile: false
77
+ seed: 0
78
+ lr_scheduler:
79
+ - type: linear
80
+ steps: 5000
81
+ start: 0
82
+ end: 5.0e-06
83
+ - type: cosine
84
+ steps: 1495000
85
+ start: 5.0e-06
86
+ end: 1.0e-07
87
+ gradient_accumulation: 1
88
+ loss:
89
+ l1_weight: 100
90
+ lpips_weight: 100
91
+ kl_weight: 1.0e-06
92
+ gan_weight: 1
93
+ r1_weight: 0.0
94
+ lecam_weight: 0.0
95
+ dis_type: nonsaturating
96
+ ref_kl_weight: 500
97
+ optimizer:
98
+ lr: 5.0e-06
99
+ fused: true
100
+ betas:
101
+ - 0.5
102
+ - 0.9
103
+ weight_decay: 0
104
+ parameters_as_bucket_view: true
105
+ writer:
106
+ wandb:
107
+ project: ming_dino_vae_exp
108
+ name: 1206_sae-e2e_from_pt_w_noise_0.8_denormalize_kl500
109
+ interval:
110
+ loss: 100
111
+ image: 1000
112
+ persistence:
113
+ interval: 5000
114
+ path: hdfs://haruna/home/byte_data_seed/hdd_hldy/user/ming.li1/work_dirs/vfm_exp/ming_dino_vae_exp/1206_sae-e2e_from_pt_w_noise_0.8_denormalize_kl500
115
+ override: true
116
+ evaluation:
117
+ interval: 5000
118
+ metrics:
119
+ fid:
120
+ __object__:
121
+ path: common.evaluation.metrics
122
+ name: FID
123
+ args: as_params
124
+ extractor_path: hdfs://haruna/home/byte_uslab_cvg_lq/user/zhaoyang/pretrained_models/weights-inception-2015-12-05-6726825d.pth
125
+ resolution: 299
126
+ target_info_path: null
127
+ normalize: true
128
+ interpolation: bilinear
129
+ psnr:
130
+ __object__:
131
+ path: torchmetrics.image
132
+ name: PeakSignalNoiseRatio
133
+ args: as_params
134
+ lpips:
135
+ __object__:
136
+ path: common.evaluation.metrics
137
+ name: LPIPS
138
+ args: as_params
139
+ net_type: vgg
140
+ normalize: true
141
+ extractor_path: hdfs://haruna/home/byte_seed_vgfm/pretrained_models/vgg16-397923af.pth
142
+ ae:
143
+ shift_factor: -0.001972413854673505
144
+ scale_factor: 0.5329070091247559
145
+ ema_shift_factor: -0.0019670347683131695
146
+ ema_scale_factor: 0.247765451669693