dyflional10
/

sae

Model card Files Files and versions

xet

Community

dyflional10 commited on 5 days ago

Commit

c514883

verified ·

1 Parent(s): 7c5ab85

Upload configs/main.yaml with huggingface_hub

Browse files

Files changed (1) hide show

configs/main.yaml +146 -0

configs/main.yaml ADDED Viewed

	@@ -0,0 +1,146 @@

+__object__:
+  path: projects.video_vae_v3.train_image
+  name: ImageAutoencoderAdversarialTrainer
+data:
+  training:
+    img_256:
+      path: data.image.configs.vae.t2i_good
+      resolution: 256
+      format: tensor
+      batch_size: 16
+      sample_weight: 0.25
+      num_workers: 8
+      prefetch_factor: 16
+  validation:
+    path: data.image.configs.benchmark.coco_2017
+    resolution: 256
+    format: pil
+    batch_size: 3
+    num_workers: 2
+    prefetch_factor: 16
+vae:
+  ema:
+    decay: 0.9995
+    interval: 1
+    on_cpu: false
+  model:
+    __object__:
+      path: models.dino_v3.image_vae_dinov3_encode
+      name: AutoencoderKL
+      args: as_params
+    enc_block_out_channels:
+    - 128
+    - 256
+    - 384
+    - 512
+    - 768
+    dec_block_out_channels:
+    - 1280
+    - 1024
+    - 512
+    - 256
+    - 128
+    enc_layers_per_block: 2
+    dec_layers_per_block: 3
+    in_channels: 3
+    latent_channels: 1280
+    out_channels: 3
+    use_quant_conv: false
+    use_post_quant_conv: false
+    spatial_downsample_factor: 16
+    variational: false
+    running_mode: enc_dec
+    noise_tau: 0.8
+    denormalize_decoder_output: true
+    random_masking_channel_ratio: 0.0
+  running_mode: init_loading_whole_ckpt
+  pretrained_pths: hdfs://haruna/home/byte_data_seed/hdd_hldy/user/ming.li1/work_dirs/vfm_exp/ming_dino_vae_exp/dino-vae_decoder-only_noise-0.8_denormalize/states/0000485000/
+dis:
+  model:
+    __object__:
+      path: models.video_vae_v4.modules.discriminators.patchgan
+      name: PatchDiscriminator2d
+      args: as_params
+    in_channels: 3
+  sync_batch_norm: false
+lpips:
+  net: vgg
+  pretrained: true
+training:
+  project: ming_dino_vae_exp
+  name: 1206_sae-e2e_from_pt_w_noise_0.8_denormalize_kl500
+  total_steps: 1500000
+  warmup_steps: 5000
+  precision: tf32
+  memory_format: channels_first
+  compile: false
+  seed: 0
+  lr_scheduler:
+  - type: linear
+    steps: 5000
+    start: 0
+    end: 5.0e-06
+  - type: cosine
+    steps: 1495000
+    start: 5.0e-06
+    end: 1.0e-07
+  gradient_accumulation: 1
+loss:
+  l1_weight: 100
+  lpips_weight: 100
+  kl_weight: 1.0e-06
+  gan_weight: 1
+  r1_weight: 0.0
+  lecam_weight: 0.0
+  dis_type: nonsaturating
+  ref_kl_weight: 500
+optimizer:
+  lr: 5.0e-06
+  fused: true
+  betas:
+  - 0.5
+  - 0.9
+  weight_decay: 0
+  parameters_as_bucket_view: true
+writer:
+  wandb:
+    project: ming_dino_vae_exp
+    name: 1206_sae-e2e_from_pt_w_noise_0.8_denormalize_kl500
+  interval:
+    loss: 100
+    image: 1000
+persistence:
+  interval: 5000
+  path: hdfs://haruna/home/byte_data_seed/hdd_hldy/user/ming.li1/work_dirs/vfm_exp/ming_dino_vae_exp/1206_sae-e2e_from_pt_w_noise_0.8_denormalize_kl500
+  override: true
+evaluation:
+  interval: 5000
+  metrics:
+    fid:
+      __object__:
+        path: common.evaluation.metrics
+        name: FID
+        args: as_params
+      extractor_path: hdfs://haruna/home/byte_uslab_cvg_lq/user/zhaoyang/pretrained_models/weights-inception-2015-12-05-6726825d.pth
+      resolution: 299
+      target_info_path: null
+      normalize: true
+      interpolation: bilinear
+    psnr:
+      __object__:
+        path: torchmetrics.image
+        name: PeakSignalNoiseRatio
+        args: as_params
+    lpips:
+      __object__:
+        path: common.evaluation.metrics
+        name: LPIPS
+        args: as_params
+      net_type: vgg
+      normalize: true
+      extractor_path: hdfs://haruna/home/byte_seed_vgfm/pretrained_models/vgg16-397923af.pth
+ae:
+  shift_factor: -0.001972413854673505
+  scale_factor: 0.5329070091247559
+  ema_shift_factor: -0.0019670347683131695
+  ema_scale_factor: 0.247765451669693