| { |
| "seed": 42, |
| "output_dir": "work_dirs/baseline", |
| "model": { |
| "img_size": 256, |
| "input_range": "minus_one_one", |
| "num_classes": 1000, |
| "encoder_type": "rectok", |
| "encoder_model_size": "base", |
| "encoder_patch_size": 16, |
| "token_channels": 128, |
| "mask_ratio": 0.4, |
| "mask_ratio_min": -0.1, |
| "mask_ratio_type": "random", |
| "use_qknorm_encoder": false, |
| "latent_hw": 16, |
| "decoder_model": "JiTCoT-B/16", |
| "decoder_patch_size": 16, |
| "bottleneck_dim_latent": 128, |
| "dh_depth": 2, |
| "dh_hidden_size": 1024, |
| "attn_dropout": 0.0, |
| "proj_dropout": 0.0, |
| "enable_ema": true, |
| "ema_decay1": 0.9999, |
| "ema_decay2": 0.9998, |
| "label_drop_prob": 0.1, |
| "P_mean": -0.4, |
| "P_std": 0.8, |
| "latent_mean": -1.2, |
| "latent_std": 1.0, |
| "latent_weight": 1.0, |
| "choose_latent_p": 0.4, |
| "perceptual_weight": 1.0, |
| "perceptual_net": "lpips-convnext_s-1.0-0.1", |
| "sample_mode": "latent_first_cascaded_noised", |
| "latent_max_t": 1.0, |
| "latent_pixel_offset": 0.0, |
| "latent_pixel_shift": 1.0, |
| "t_eps": 0.05, |
| "t_eps_inference": 0.05, |
| "noise_scale": 1.0, |
| "sampling_method": "heun", |
| "num_sampling_steps": 50, |
| "cfg": 1.0, |
| "cfg_latent": 1.0, |
| "interval_min": 0.0, |
| "interval_max": 1.0, |
| "interval_min_latent": 0.0, |
| "interval_max_latent": 1.0, |
| "gen_shift_pixel": 1.0, |
| "gen_shift_latent": 1.0, |
| "guidance_method": "cfg" |
| }, |
| "data": { |
| "train_dir": "data/imagenet/train", |
| "val_dir": "data/imagenet/val", |
| "num_workers": 8, |
| "pin_memory": true, |
| "persistent_workers": true |
| }, |
| "train": { |
| "epochs": 200, |
| "global_batch_size": 1024, |
| "eval_global_batch_size": 1024, |
| "grad_accum_steps": 1, |
| "grad_clip": 3.0, |
| "amp_dtype": "bf16", |
| "log_interval": 50 |
| }, |
| "visualization": { |
| "initial_visualization": true, |
| "vis_interval": 100, |
| "visualize_reconstruction": true, |
| "visualize_generation": true |
| }, |
| "eval": { |
| "initial_eval": { |
| "reconstruction": false, |
| "generation": false |
| }, |
| "gfid_interval": 10, |
| "rfid_interval": 10, |
| "gfid_stats_path": "fid_stats/jit_in256_stats.npz", |
| "rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz", |
| "inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth", |
| "gfid_backend": "online", |
| "gfid_num_classes": 1000, |
| "gfid_num_images": 50000, |
| "rfid_num_images": 50000, |
| "batch_size": 64, |
| "num_workers": 8, |
| "gfid_metric_verbose": false, |
| "gfid_keep_images": false, |
| "gfid_cfg_scale": null, |
| "gfid_cfg_scale_latent": null, |
| "gfid_cfg_interval": null, |
| "gfid_cfg_interval_latent": null, |
| "gfid_steps": null, |
| "eval_ema": "1" |
| }, |
| "optim": { |
| "name": "adamw", |
| "lr": 0.0001, |
| "lr_schedule": "constant", |
| "weight_decay": 0.0, |
| "betas": [ |
| 0.9, |
| 0.95 |
| ], |
| "min_lr": 1e-06, |
| "warmup_epochs": 5 |
| }, |
| "checkpoint": { |
| "resume": "", |
| "auto_resume": true, |
| "save_interval": 1, |
| "keep_last": 3 |
| }, |
| "logging": { |
| "enable_wandb": false, |
| "entity": "", |
| "project": "diffusion-decoder", |
| "run_name": "diffusion_decoder_imagenet256" |
| } |
| } |