{ "seed": 42, "output_dir": "work_dirs/baseline", "model": { "img_size": 256, "input_range": "minus_one_one", "num_classes": 1000, "encoder_type": "rectok", "encoder_model_size": "base", "encoder_patch_size": 16, "token_channels": 128, "mask_ratio": 0.4, "mask_ratio_min": -0.1, "mask_ratio_type": "random", "use_qknorm_encoder": false, "latent_hw": 16, "decoder_model": "JiTCoT-B/16", "decoder_patch_size": 16, "bottleneck_dim_latent": 128, "dh_depth": 2, "dh_hidden_size": 1024, "attn_dropout": 0.0, "proj_dropout": 0.0, "enable_ema": true, "ema_decay1": 0.9999, "ema_decay2": 0.9998, "label_drop_prob": 0.1, "P_mean": -0.4, "P_std": 0.8, "latent_mean": -1.2, "latent_std": 1.0, "latent_weight": 1.0, "choose_latent_p": 0.4, "perceptual_weight": 1.0, "perceptual_net": "lpips-convnext_s-1.0-0.1", "sample_mode": "latent_first_cascaded_noised", "latent_max_t": 1.0, "latent_pixel_offset": 0.0, "latent_pixel_shift": 1.0, "t_eps": 0.05, "t_eps_inference": 0.05, "noise_scale": 1.0, "sampling_method": "heun", "num_sampling_steps": 50, "cfg": 1.0, "cfg_latent": 1.0, "interval_min": 0.0, "interval_max": 1.0, "interval_min_latent": 0.0, "interval_max_latent": 1.0, "gen_shift_pixel": 1.0, "gen_shift_latent": 1.0, "guidance_method": "cfg" }, "data": { "train_dir": "data/imagenet/train", "val_dir": "data/imagenet/val", "num_workers": 8, "pin_memory": true, "persistent_workers": true }, "train": { "epochs": 200, "global_batch_size": 1024, "eval_global_batch_size": 1024, "grad_accum_steps": 1, "grad_clip": 3.0, "amp_dtype": "bf16", "log_interval": 50 }, "visualization": { "initial_visualization": true, "vis_interval": 100, "visualize_reconstruction": true, "visualize_generation": true }, "eval": { "initial_eval": { "reconstruction": false, "generation": false }, "gfid_interval": 10, "rfid_interval": 10, "gfid_stats_path": "fid_stats/jit_in256_stats.npz", "rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz", "inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth", "gfid_backend": "online", "gfid_num_classes": 1000, "gfid_num_images": 50000, "rfid_num_images": 50000, "batch_size": 64, "num_workers": 8, "gfid_metric_verbose": false, "gfid_keep_images": false, "gfid_cfg_scale": null, "gfid_cfg_scale_latent": null, "gfid_cfg_interval": null, "gfid_cfg_interval_latent": null, "gfid_steps": null, "eval_ema": "1" }, "optim": { "name": "adamw", "lr": 0.0001, "lr_schedule": "constant", "weight_decay": 0.0, "betas": [ 0.9, 0.95 ], "min_lr": 1e-06, "warmup_epochs": 5 }, "checkpoint": { "resume": "", "auto_resume": true, "save_interval": 1, "keep_last": 3 }, "logging": { "enable_wandb": false, "entity": "", "project": "diffusion-decoder", "run_name": "diffusion_decoder_imagenet256" } }