diffusion-decoder / baseline /config.resolved.json
QingyuShi's picture
Upload files excluding .pt
5d8c7a5 verified
Raw
History Blame Contribute Delete
3.19 kB
{
"seed": 42,
"output_dir": "work_dirs/baseline",
"model": {
"img_size": 256,
"input_range": "minus_one_one",
"num_classes": 1000,
"encoder_type": "rectok",
"encoder_model_size": "base",
"encoder_patch_size": 16,
"token_channels": 128,
"mask_ratio": 0.4,
"mask_ratio_min": -0.1,
"mask_ratio_type": "random",
"use_qknorm_encoder": false,
"latent_hw": 16,
"decoder_model": "JiTCoT-B/16",
"decoder_patch_size": 16,
"bottleneck_dim_latent": 128,
"dh_depth": 2,
"dh_hidden_size": 1024,
"attn_dropout": 0.0,
"proj_dropout": 0.0,
"enable_ema": true,
"ema_decay1": 0.9999,
"ema_decay2": 0.9998,
"label_drop_prob": 0.1,
"P_mean": -0.4,
"P_std": 0.8,
"latent_mean": -1.2,
"latent_std": 1.0,
"latent_weight": 1.0,
"choose_latent_p": 0.4,
"perceptual_weight": 1.0,
"perceptual_net": "lpips-convnext_s-1.0-0.1",
"sample_mode": "latent_first_cascaded_noised",
"latent_max_t": 1.0,
"latent_pixel_offset": 0.0,
"latent_pixel_shift": 1.0,
"t_eps": 0.05,
"t_eps_inference": 0.05,
"noise_scale": 1.0,
"sampling_method": "heun",
"num_sampling_steps": 50,
"cfg": 1.0,
"cfg_latent": 1.0,
"interval_min": 0.0,
"interval_max": 1.0,
"interval_min_latent": 0.0,
"interval_max_latent": 1.0,
"gen_shift_pixel": 1.0,
"gen_shift_latent": 1.0,
"guidance_method": "cfg"
},
"data": {
"train_dir": "data/imagenet/train",
"val_dir": "data/imagenet/val",
"num_workers": 8,
"pin_memory": true,
"persistent_workers": true
},
"train": {
"epochs": 200,
"global_batch_size": 1024,
"eval_global_batch_size": 1024,
"grad_accum_steps": 1,
"grad_clip": 3.0,
"amp_dtype": "bf16",
"log_interval": 50
},
"visualization": {
"initial_visualization": true,
"vis_interval": 100,
"visualize_reconstruction": true,
"visualize_generation": true
},
"eval": {
"initial_eval": {
"reconstruction": false,
"generation": false
},
"gfid_interval": 10,
"rfid_interval": 10,
"gfid_stats_path": "fid_stats/jit_in256_stats.npz",
"rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
"inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
"gfid_backend": "online",
"gfid_num_classes": 1000,
"gfid_num_images": 50000,
"rfid_num_images": 50000,
"batch_size": 64,
"num_workers": 8,
"gfid_metric_verbose": false,
"gfid_keep_images": false,
"gfid_cfg_scale": null,
"gfid_cfg_scale_latent": null,
"gfid_cfg_interval": null,
"gfid_cfg_interval_latent": null,
"gfid_steps": null,
"eval_ema": "1"
},
"optim": {
"name": "adamw",
"lr": 0.0001,
"lr_schedule": "constant",
"weight_decay": 0.0,
"betas": [
0.9,
0.95
],
"min_lr": 1e-06,
"warmup_epochs": 5
},
"checkpoint": {
"resume": "",
"auto_resume": true,
"save_interval": 1,
"keep_last": 3
},
"logging": {
"enable_wandb": false,
"entity": "",
"project": "diffusion-decoder",
"run_name": "diffusion_decoder_imagenet256"
}
}