Add model checkpoint and config files
Browse files- checkpoint.pt +3 -0
- checkpoint_.pt +3 -0
- config.yaml +106 -0
- log.txt +0 -0
- model.txt +47 -0
checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a5262e8184d15d3876a7376e559e6a5643cad669864495687214397133875d3
|
| 3 |
+
size 10801103890
|
checkpoint_.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a5262e8184d15d3876a7376e559e6a5643cad669864495687214397133875d3
|
| 3 |
+
size 10801103890
|
config.yaml
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_dir: .exp/diffusion/imagenet_512/dc_ae_f32c32_in_1.0/dit_xl_1/bs_1024_lr_2e-4_fp16
|
| 2 |
+
seed: 0
|
| 3 |
+
allow_tf32: true
|
| 4 |
+
resolution: 512
|
| 5 |
+
amp: fp16
|
| 6 |
+
cfg_scale: 1.0
|
| 7 |
+
evaluate_split: test
|
| 8 |
+
evaluate_dir_name: null
|
| 9 |
+
num_save_images: 64
|
| 10 |
+
save_all_images: false
|
| 11 |
+
save_images_at_all_procs: false
|
| 12 |
+
evaluate_dataset: sample_class
|
| 13 |
+
sample_class:
|
| 14 |
+
name: sample_class
|
| 15 |
+
batch_size: 32
|
| 16 |
+
n_worker: 8
|
| 17 |
+
train_drop_last: true
|
| 18 |
+
seed: 0
|
| 19 |
+
num_classes: 1000
|
| 20 |
+
num_samples: 50000
|
| 21 |
+
autoencoder: dc-ae-f32c32-in-1.0
|
| 22 |
+
autoencoder_dtype: fp32
|
| 23 |
+
scaling_factor: 0.3189
|
| 24 |
+
model: dit
|
| 25 |
+
dit:
|
| 26 |
+
name: DiT
|
| 27 |
+
input_size: 16
|
| 28 |
+
patch_size: 1
|
| 29 |
+
in_channels: 32
|
| 30 |
+
hidden_size: 1152
|
| 31 |
+
depth: 28
|
| 32 |
+
num_heads: 16
|
| 33 |
+
mlp_ratio: 4.0
|
| 34 |
+
post_norm: false
|
| 35 |
+
class_dropout_prob: 0.1
|
| 36 |
+
num_classes: 1000
|
| 37 |
+
learn_sigma: true
|
| 38 |
+
unconditional: false
|
| 39 |
+
use_checkpoint: true
|
| 40 |
+
pretrained_path: null
|
| 41 |
+
pretrained_source: dc-ae
|
| 42 |
+
eval_scheduler: GaussianDiffusion
|
| 43 |
+
num_inference_steps: 250
|
| 44 |
+
train_scheduler: GaussianDiffusion
|
| 45 |
+
uvit:
|
| 46 |
+
name: UViT
|
| 47 |
+
input_size: 32
|
| 48 |
+
patch_size: 2
|
| 49 |
+
in_channels: 4
|
| 50 |
+
hidden_size: 1152
|
| 51 |
+
depth: 28
|
| 52 |
+
num_heads: 16
|
| 53 |
+
mlp_ratio: 4.0
|
| 54 |
+
mlp_time_embed: false
|
| 55 |
+
qkv_bias: false
|
| 56 |
+
act_layer: gelu
|
| 57 |
+
use_checkpoint: true
|
| 58 |
+
class_dropout_prob: 0.1
|
| 59 |
+
num_classes: 1000
|
| 60 |
+
pretrained_path: null
|
| 61 |
+
pretrained_source: dc-ae
|
| 62 |
+
eval_scheduler: DPM_Solver
|
| 63 |
+
num_inference_steps: 30
|
| 64 |
+
train_scheduler: DPM_Solver
|
| 65 |
+
attn_mode: null
|
| 66 |
+
compute_fid: true
|
| 67 |
+
fid:
|
| 68 |
+
save_path: null
|
| 69 |
+
ref_path: assets/data/fid/imagenet_512_train.npz
|
| 70 |
+
compute_inception_score: true
|
| 71 |
+
inception_score: {}
|
| 72 |
+
train_dataset: latent_imagenet
|
| 73 |
+
latent_imagenet:
|
| 74 |
+
name: latent_imagenet
|
| 75 |
+
batch_size: 32
|
| 76 |
+
n_worker: 8
|
| 77 |
+
train_drop_last: true
|
| 78 |
+
seed: 0
|
| 79 |
+
data_dir: assets/data/latent/dc_ae_f32c32_in_1.0/imagenet_512
|
| 80 |
+
resume: true
|
| 81 |
+
resume_path: null
|
| 82 |
+
resume_schedule: true
|
| 83 |
+
num_epochs: null
|
| 84 |
+
max_steps: 3000000
|
| 85 |
+
clip_grad: null
|
| 86 |
+
num_store_images: 64
|
| 87 |
+
save_checkpoint_steps: 1000
|
| 88 |
+
evaluate_steps: 20000
|
| 89 |
+
optimizer:
|
| 90 |
+
name: adamw
|
| 91 |
+
lr: 0.0002
|
| 92 |
+
warmup_lr: 0.0
|
| 93 |
+
weight_decay: 0.0
|
| 94 |
+
no_wd_keys: []
|
| 95 |
+
betas:
|
| 96 |
+
- 0.9
|
| 97 |
+
- 0.999
|
| 98 |
+
lr_scheduler:
|
| 99 |
+
name: constant
|
| 100 |
+
warmup_steps: 1000
|
| 101 |
+
log: true
|
| 102 |
+
wandb_entity: null
|
| 103 |
+
wandb_project: null
|
| 104 |
+
ema_decay: 0.9999
|
| 105 |
+
ema_warmup_steps: 2000
|
| 106 |
+
evaluate_ema: true
|
log.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.txt
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DiT(
|
| 2 |
+
(x_embedder): PatchEmbed(
|
| 3 |
+
(proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
|
| 4 |
+
(norm): Identity()
|
| 5 |
+
)
|
| 6 |
+
(t_embedder): TimestepEmbedder(
|
| 7 |
+
(mlp): Sequential(
|
| 8 |
+
(0): Linear(in_features=256, out_features=1152, bias=True)
|
| 9 |
+
(1): SiLU()
|
| 10 |
+
(2): Linear(in_features=1152, out_features=1152, bias=True)
|
| 11 |
+
)
|
| 12 |
+
)
|
| 13 |
+
(y_embedder): LabelEmbedder(
|
| 14 |
+
(embedding_table): Embedding(1001, 1152)
|
| 15 |
+
)
|
| 16 |
+
(blocks): ModuleList(
|
| 17 |
+
(0-27): 28 x DiTBlock(
|
| 18 |
+
(norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
|
| 19 |
+
(attn): Attention(
|
| 20 |
+
(qkv): Linear(in_features=1152, out_features=3456, bias=True)
|
| 21 |
+
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 22 |
+
(proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 23 |
+
(proj_drop): Dropout(p=0.0, inplace=False)
|
| 24 |
+
)
|
| 25 |
+
(norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
|
| 26 |
+
(mlp): Mlp(
|
| 27 |
+
(fc1): Linear(in_features=1152, out_features=4608, bias=True)
|
| 28 |
+
(act): GELU(approximate='tanh')
|
| 29 |
+
(drop1): Dropout(p=0, inplace=False)
|
| 30 |
+
(fc2): Linear(in_features=4608, out_features=1152, bias=True)
|
| 31 |
+
(drop2): Dropout(p=0, inplace=False)
|
| 32 |
+
)
|
| 33 |
+
(adaLN_modulation): Sequential(
|
| 34 |
+
(0): SiLU()
|
| 35 |
+
(1): Linear(in_features=1152, out_features=6912, bias=True)
|
| 36 |
+
)
|
| 37 |
+
)
|
| 38 |
+
)
|
| 39 |
+
(final_layer): FinalLayer(
|
| 40 |
+
(norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
|
| 41 |
+
(linear): Linear(in_features=1152, out_features=64, bias=True)
|
| 42 |
+
(adaLN_modulation): Sequential(
|
| 43 |
+
(0): SiLU()
|
| 44 |
+
(1): Linear(in_features=1152, out_features=2304, bias=True)
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
)
|