| { | |
| "seed": 1, | |
| "decoder": { | |
| "unets": [ | |
| { | |
| "dim": 32, | |
| "cond_dim": 512, | |
| "image_embed_dim": 768, | |
| "text_embed_dim": 768, | |
| "cond_on_text_encodings": true, | |
| "channels": 3, | |
| "dim_mults": [1, 2, 3, 4], | |
| "num_resnet_blocks": 4, | |
| "attn_heads": 8, | |
| "attn_dim_head": 64, | |
| "sparse_attn": true, | |
| "memory_efficient": true, | |
| "self_attn": [false, true, true, true] | |
| }, | |
| { | |
| "dim": 32, | |
| "cond_dim": 512, | |
| "image_embed_dim": 768, | |
| "text_embed_dim": 768, | |
| "cond_on_text_encodings": true, | |
| "channels": 3, | |
| "dim_mults": [1, 2, 3, 4], | |
| "num_resnet_blocks": 4, | |
| "attn_heads": 8, | |
| "attn_dim_head": 64, | |
| "sparse_attn": true, | |
| "memory_efficient": true, | |
| "self_attn": [false, true, true, true] | |
| }, | |
| { | |
| "dim": 192, | |
| "cond_dim": 512, | |
| "image_embed_dim": 768, | |
| "text_embed_dim": 768, | |
| "cond_on_text_encodings": true, | |
| "init_cross_embed": false, | |
| "channels": 3, | |
| "dim_mults": [1, 2, 3, 4], | |
| "num_resnet_blocks": 3, | |
| "attn_heads": 8, | |
| "attn_dim_head": 64, | |
| "sparse_attn": false, | |
| "memory_efficient": true, | |
| "self_attn": [false, false, false, false] | |
| } | |
| ], | |
| "clip": { | |
| "make": "openai", | |
| "model": "ViT-L/14" | |
| }, | |
| "image_sizes": [64, 256, 1024], | |
| "random_crop_sizes": [null, null, 256], | |
| "channels": 3, | |
| "timesteps": 1000, | |
| "loss_type": "l2", | |
| "beta_schedule": ["cosine", "cosine", "cosine"], | |
| "learned_variance": true, | |
| "text_cond_drop_prob": 0.0, | |
| "image_cond_drop_prob": 0.0 | |
| }, | |
| "data": { | |
| "webdataset_base_url": "pipe:aws s3 cp --quiet s3://s-datasets/laion-high-resolution/{}.tar -", | |
| "num_workers": 6, | |
| "batch_size": 8, | |
| "start_shard": 0, | |
| "end_shard": 17535, | |
| "shard_width": 5, | |
| "index_width": 4, | |
| "splits": { | |
| "train": 0.75, | |
| "val": 0.15, | |
| "test": 0.1 | |
| }, | |
| "shuffle_train": false, | |
| "resample_train": true, | |
| "preprocessing": { | |
| "RandomResizedCrop": { | |
| "size": [1024, 1024], | |
| "scale": [0.75, 1.0], | |
| "ratio": [1.0, 1.0] | |
| }, | |
| "ToTensor": true | |
| } | |
| }, | |
| "train": { | |
| "epochs": 1000, | |
| "lr": 1.2e-4, | |
| "wd": 0.0, | |
| "max_grad_norm": 0.5, | |
| "save_every_n_samples": 2000000, | |
| "n_sample_images": 2, | |
| "device": "cuda:0", | |
| "epoch_samples": 10000000, | |
| "validation_samples": 100000, | |
| "use_ema": true, | |
| "ema_beta": 0.9999, | |
| "unet_training_mask": [false, false, true] | |
| }, | |
| "evaluate": { | |
| "n_evaluation_samples": 2, | |
| "FID": { | |
| "feature": 64 | |
| }, | |
| "LPIPS": { | |
| "net_type": "vgg", | |
| "reduction": "mean" | |
| } | |
| }, | |
| "tracker": { | |
| "data_path": "/fsx/aidan/new/multinode/experiments/decoder_1024/.tracker-data", | |
| "overwrite_data_path": true, | |
| "log": { | |
| "log_type": "wandb", | |
| "wandb_entity": "Veldrovive", | |
| "wandb_project": "upsamplers_1024", | |
| "wandb_resume": false, | |
| "auto_resume": true, | |
| "verbose": true | |
| }, | |
| "load": { | |
| "load_from": null, | |
| "only_auto_resume": true, | |
| "file_path": "/fsx/aidan/new/multinode/experiments/decoder_1024/models/checkpoints/latest.pth" | |
| }, | |
| "save": [ | |
| { | |
| "save_to": "huggingface", | |
| "huggingface_repo": "laion/DALLE2-PyTorch", | |
| "save_meta_to": "upsampler/1024/v1.0.3/", | |
| "save_latest_to": "upsampler/1024/v1.0.3/latest.pth", | |
| "save_type": "model" | |
| },{ | |
| "save_to": "huggingface", | |
| "huggingface_repo": "laion/DALLE2-PyTorch", | |
| "save_latest_to": "upsampler/1024/v1.0.2/checkpoints/latest.pth", | |
| "save_type": "checkpoint" | |
| },{ | |
| "save_to": "local", | |
| "save_latest_to": "/fsx/aidan/new/multinode/experiments/decoder_1024/models/checkpoints/latest.pth", | |
| "save_type": "checkpoint" | |
| }] | |
| } | |
| } |