| { | |
| "dataset": { | |
| "batch_duplicates": 1, | |
| "cache_dataset": true, | |
| "cropping": "none", | |
| "eval_split": "test", | |
| "flipping": "left_right", | |
| "image_size": 32, | |
| "name": "object_recognition", | |
| "num_classes": 10, | |
| "tfds_name": "cifar10", | |
| "train_split": "train" | |
| }, | |
| "datasets": [ | |
| { | |
| "batch_duplicates": 1, | |
| "cache_dataset": true, | |
| "cropping": "none", | |
| "eval_split": "test", | |
| "flipping": "left_right", | |
| "image_size": 32, | |
| "name": "object_recognition", | |
| "num_classes": 10, | |
| "tfds_name": "cifar10", | |
| "train_split": "train" | |
| } | |
| ], | |
| "eval": { | |
| "batch_size": 64, | |
| "checkpoint_dir": "", | |
| "steps": 2, | |
| "tag": "eval", | |
| "write_images_to_file": true | |
| }, | |
| "model": { | |
| "arch_name": "tape", | |
| "b_scale": 1.0, | |
| "b_type": "uint8", | |
| "cond_decoupled_read": false, | |
| "cond_dim": 0, | |
| "cond_dropout": 0.0, | |
| "cond_on_latent": true, | |
| "cond_proj": true, | |
| "cond_tape_writable": false, | |
| "conditional": "class", | |
| "conv_drop_units": 0.0, | |
| "conv_kernel_size": 0, | |
| "drop_att": 0.0, | |
| "drop_path": 0.1, | |
| "drop_sc": 0.0, | |
| "drop_units": 0.1, | |
| "flip_rate": 0.0, | |
| "guidance": 0.0, | |
| "infer_iterations": 100, | |
| "infer_schedule": "cosine", | |
| "latent_dim": 512, | |
| "latent_mlp_ratio": 4, | |
| "latent_num_heads": 16, | |
| "latent_pos_encoding": "learned", | |
| "latent_slots": 128, | |
| "loss_type": "eps", | |
| "name": "image_diffusion_model", | |
| "normalize_noisy_input": false, | |
| "num_layers": "2,2,2", | |
| "patch_size": 2, | |
| "pos_encoding": "sin_cos", | |
| "pred_type": "eps", | |
| "pretrained_ckpt": "", | |
| "rw_num_heads": 8, | |
| "sampler_name": "ddpm", | |
| "self_cond": "latent", | |
| "self_cond_by_masking": true, | |
| "self_cond_rate": 0.9, | |
| "tape_dim": 256, | |
| "tape_mlp_ratio": 2, | |
| "tape_pos_encoding": "learned", | |
| "td": 0.0, | |
| "time_on_latent": true, | |
| "time_scaling": 1000, | |
| "train_schedule": "sigmoid@-3,3,0.9", | |
| "use_cls_token": false, | |
| "x0_clip": "auto", | |
| "xattn_enc_ln": false | |
| }, | |
| "model_dir": "results/cifar10", | |
| "optimization": { | |
| "beta1": 0.9, | |
| "beta2": 0.999, | |
| "ema_decay": 0.9999, | |
| "ema_name_exact_match": true, | |
| "end_lr_factor": 0.0, | |
| "eps": 1e-08, | |
| "exclude_from_weight_decay": "bias,beta,gamma", | |
| "global_clipnorm": 1.0, | |
| "learning_rate": 0.003, | |
| "learning_rate_scaling": "none", | |
| "learning_rate_schedule": "cosine@0.8", | |
| "momentum": 0.9, | |
| "optimizer": "lamb", | |
| "tail_steps": 0, | |
| "warmup_epochs": 0, | |
| "warmup_steps": 10000, | |
| "weight_decay": 0.01 | |
| }, | |
| "task": { | |
| "name": "image_generation", | |
| "weight": 1.0 | |
| }, | |
| "tasks": [ | |
| { | |
| "name": "image_generation", | |
| "weight": 1.0 | |
| } | |
| ], | |
| "train": { | |
| "batch_size": 256, | |
| "checkpoint_epochs": 40, | |
| "checkpoint_steps": 1000, | |
| "epochs": 100, | |
| "keep_checkpoint_max": 10, | |
| "label_smoothing": 0.0, | |
| "steps": 150000 | |
| } | |
| } |