End of training
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint-20000/optimizer.bin +3 -0
- checkpoint-20000/random_states_0.pkl +3 -0
- checkpoint-20000/scaler.pt +3 -0
- checkpoint-20000/scheduler.bin +3 -0
- checkpoint-20000/unet/config.json +62 -0
- checkpoint-20000/unet/diffusion_pytorch_model.bin +3 -0
- checkpoint-20000/unet_ema/config.json +69 -0
- checkpoint-20000/unet_ema/diffusion_pytorch_model.bin +3 -0
- checkpoint-40000/optimizer.bin +3 -0
- checkpoint-40000/random_states_0.pkl +3 -0
- checkpoint-40000/scaler.pt +3 -0
- checkpoint-40000/scheduler.bin +3 -0
- checkpoint-40000/unet/config.json +62 -0
- checkpoint-40000/unet/diffusion_pytorch_model.bin +3 -0
- checkpoint-40000/unet_ema/config.json +69 -0
- checkpoint-40000/unet_ema/diffusion_pytorch_model.bin +3 -0
- feature_extractor/preprocessor_config.json +28 -0
- logs/text2image-fine-tune/1687484281.3629444/events.out.tfevents.1687484281.g023.sol.rc.asu.edu.1227467.1 +3 -0
- logs/text2image-fine-tune/1687484281.4535692/hparams.yml +50 -0
- logs/text2image-fine-tune/1687484732.4959297/events.out.tfevents.1687484732.g023.sol.rc.asu.edu.1227888.1 +3 -0
- logs/text2image-fine-tune/1687484732.5515726/hparams.yml +50 -0
- logs/text2image-fine-tune/1687485306.054261/events.out.tfevents.1687485306.g023.sol.rc.asu.edu.1228654.1 +3 -0
- logs/text2image-fine-tune/1687485306.1146164/hparams.yml +50 -0
- logs/text2image-fine-tune/1687485686.1828926/events.out.tfevents.1687485686.g023.sol.rc.asu.edu.1229451.1 +3 -0
- logs/text2image-fine-tune/1687485686.2287989/hparams.yml +50 -0
- logs/text2image-fine-tune/1687488854.9296603/events.out.tfevents.1687488854.g012.sol.rc.asu.edu.891332.1 +3 -0
- logs/text2image-fine-tune/1687488854.9867501/hparams.yml +50 -0
- logs/text2image-fine-tune/1687489415.8570626/events.out.tfevents.1687489415.g012.sol.rc.asu.edu.892320.1 +3 -0
- logs/text2image-fine-tune/1687489415.8990405/hparams.yml +50 -0
- logs/text2image-fine-tune/1687489894.8476286/events.out.tfevents.1687489894.g012.sol.rc.asu.edu.892948.1 +3 -0
- logs/text2image-fine-tune/1687489894.9371443/hparams.yml +50 -0
- logs/text2image-fine-tune/1687490219.3755434/events.out.tfevents.1687490219.g012.sol.rc.asu.edu.893130.1 +3 -0
- logs/text2image-fine-tune/1687490219.4269717/hparams.yml +50 -0
- logs/text2image-fine-tune/1687567394.2765768/events.out.tfevents.1687567394.g012.sol.rc.asu.edu.1015146.1 +3 -0
- logs/text2image-fine-tune/1687567394.285055/hparams.yml +50 -0
- logs/text2image-fine-tune/1687567582.553664/events.out.tfevents.1687567582.g017.sol.rc.asu.edu.3986616.1 +3 -0
- logs/text2image-fine-tune/1687567582.5628018/hparams.yml +50 -0
- logs/text2image-fine-tune/1688712104.851267/events.out.tfevents.1688712104.g010.sol.rc.asu.edu.2546128.1 +3 -0
- logs/text2image-fine-tune/1688712105.184274/hparams.yml +50 -0
- logs/text2image-fine-tune/1688785360.9873862/events.out.tfevents.1688785360.g010.sol.rc.asu.edu.2589748.1 +3 -0
- logs/text2image-fine-tune/1688785361.2153516/hparams.yml +50 -0
- logs/text2image-fine-tune/events.out.tfevents.1687484281.g023.sol.rc.asu.edu.1227467.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687484730.g023.sol.rc.asu.edu.1227888.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687485305.g023.sol.rc.asu.edu.1228654.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687485686.g023.sol.rc.asu.edu.1229451.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687488854.g012.sol.rc.asu.edu.891332.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687489415.g012.sol.rc.asu.edu.892320.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687489894.g012.sol.rc.asu.edu.892948.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687490219.g012.sol.rc.asu.edu.893130.0 +3 -0
- logs/text2image-fine-tune/events.out.tfevents.1687567394.g012.sol.rc.asu.edu.1015146.0 +3 -0
checkpoint-20000/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37d9c13d1711eaff1572fa71ff039a7ad5766c9f821873480100c7e392a36dd9
|
| 3 |
+
size 6876749651
|
checkpoint-20000/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:632b52d6f4793e5dbc7409c5e420a9019450ea432dfe71ed2d30f7cc0df43a91
|
| 3 |
+
size 14663
|
checkpoint-20000/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be5e771fc1f584f9f2886467944e52c2a921d5979873e4963bea0ca857322f26
|
| 3 |
+
size 557
|
checkpoint-20000/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9564e8ff9fb42a8970a60b5119ae43b907efd058c800b04b2867d62e8aad6bc
|
| 3 |
+
size 563
|
checkpoint-20000/unet/config.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.18.0.dev0",
|
| 4 |
+
"_name_or_path": "spatial-relationship/spatial-relationship-model-60-2",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"addition_embed_type": null,
|
| 7 |
+
"addition_embed_type_num_heads": 64,
|
| 8 |
+
"attention_head_dim": 8,
|
| 9 |
+
"block_out_channels": [
|
| 10 |
+
320,
|
| 11 |
+
640,
|
| 12 |
+
1280,
|
| 13 |
+
1280
|
| 14 |
+
],
|
| 15 |
+
"center_input_sample": false,
|
| 16 |
+
"class_embed_type": null,
|
| 17 |
+
"class_embeddings_concat": false,
|
| 18 |
+
"conv_in_kernel": 3,
|
| 19 |
+
"conv_out_kernel": 3,
|
| 20 |
+
"cross_attention_dim": 768,
|
| 21 |
+
"cross_attention_norm": null,
|
| 22 |
+
"down_block_types": [
|
| 23 |
+
"CrossAttnDownBlock2D",
|
| 24 |
+
"CrossAttnDownBlock2D",
|
| 25 |
+
"CrossAttnDownBlock2D",
|
| 26 |
+
"DownBlock2D"
|
| 27 |
+
],
|
| 28 |
+
"downsample_padding": 1,
|
| 29 |
+
"dual_cross_attention": false,
|
| 30 |
+
"encoder_hid_dim": null,
|
| 31 |
+
"encoder_hid_dim_type": null,
|
| 32 |
+
"flip_sin_to_cos": true,
|
| 33 |
+
"freq_shift": 0,
|
| 34 |
+
"in_channels": 4,
|
| 35 |
+
"layers_per_block": 2,
|
| 36 |
+
"mid_block_only_cross_attention": null,
|
| 37 |
+
"mid_block_scale_factor": 1,
|
| 38 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 39 |
+
"norm_eps": 1e-05,
|
| 40 |
+
"norm_num_groups": 32,
|
| 41 |
+
"num_class_embeds": null,
|
| 42 |
+
"only_cross_attention": false,
|
| 43 |
+
"out_channels": 4,
|
| 44 |
+
"projection_class_embeddings_input_dim": null,
|
| 45 |
+
"resnet_out_scale_factor": 1.0,
|
| 46 |
+
"resnet_skip_time_act": false,
|
| 47 |
+
"resnet_time_scale_shift": "default",
|
| 48 |
+
"sample_size": 64,
|
| 49 |
+
"time_cond_proj_dim": null,
|
| 50 |
+
"time_embedding_act_fn": null,
|
| 51 |
+
"time_embedding_dim": null,
|
| 52 |
+
"time_embedding_type": "positional",
|
| 53 |
+
"timestep_post_act": null,
|
| 54 |
+
"up_block_types": [
|
| 55 |
+
"UpBlock2D",
|
| 56 |
+
"CrossAttnUpBlock2D",
|
| 57 |
+
"CrossAttnUpBlock2D",
|
| 58 |
+
"CrossAttnUpBlock2D"
|
| 59 |
+
],
|
| 60 |
+
"upcast_attention": false,
|
| 61 |
+
"use_linear_projection": false
|
| 62 |
+
}
|
checkpoint-20000/unet/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc5c366f4cee063d2a62f45521f47773cc446b432044d00872d4e70823430922
|
| 3 |
+
size 3438375973
|
checkpoint-20000/unet_ema/config.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.18.0.dev0",
|
| 4 |
+
"_name_or_path": "spatial-relationship/spatial-relationship-model-60-2",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"addition_embed_type": null,
|
| 7 |
+
"addition_embed_type_num_heads": 64,
|
| 8 |
+
"attention_head_dim": 8,
|
| 9 |
+
"block_out_channels": [
|
| 10 |
+
320,
|
| 11 |
+
640,
|
| 12 |
+
1280,
|
| 13 |
+
1280
|
| 14 |
+
],
|
| 15 |
+
"center_input_sample": false,
|
| 16 |
+
"class_embed_type": null,
|
| 17 |
+
"class_embeddings_concat": false,
|
| 18 |
+
"conv_in_kernel": 3,
|
| 19 |
+
"conv_out_kernel": 3,
|
| 20 |
+
"cross_attention_dim": 768,
|
| 21 |
+
"cross_attention_norm": null,
|
| 22 |
+
"decay": 0.9999,
|
| 23 |
+
"down_block_types": [
|
| 24 |
+
"CrossAttnDownBlock2D",
|
| 25 |
+
"CrossAttnDownBlock2D",
|
| 26 |
+
"CrossAttnDownBlock2D",
|
| 27 |
+
"DownBlock2D"
|
| 28 |
+
],
|
| 29 |
+
"downsample_padding": 1,
|
| 30 |
+
"dual_cross_attention": false,
|
| 31 |
+
"encoder_hid_dim": null,
|
| 32 |
+
"encoder_hid_dim_type": null,
|
| 33 |
+
"flip_sin_to_cos": true,
|
| 34 |
+
"freq_shift": 0,
|
| 35 |
+
"in_channels": 4,
|
| 36 |
+
"inv_gamma": 1.0,
|
| 37 |
+
"layers_per_block": 2,
|
| 38 |
+
"mid_block_only_cross_attention": null,
|
| 39 |
+
"mid_block_scale_factor": 1,
|
| 40 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 41 |
+
"min_decay": 0.0,
|
| 42 |
+
"norm_eps": 1e-05,
|
| 43 |
+
"norm_num_groups": 32,
|
| 44 |
+
"num_class_embeds": null,
|
| 45 |
+
"only_cross_attention": false,
|
| 46 |
+
"optimization_step": 20000,
|
| 47 |
+
"out_channels": 4,
|
| 48 |
+
"power": 0.6666666666666666,
|
| 49 |
+
"projection_class_embeddings_input_dim": null,
|
| 50 |
+
"resnet_out_scale_factor": 1.0,
|
| 51 |
+
"resnet_skip_time_act": false,
|
| 52 |
+
"resnet_time_scale_shift": "default",
|
| 53 |
+
"sample_size": 64,
|
| 54 |
+
"time_cond_proj_dim": null,
|
| 55 |
+
"time_embedding_act_fn": null,
|
| 56 |
+
"time_embedding_dim": null,
|
| 57 |
+
"time_embedding_type": "positional",
|
| 58 |
+
"timestep_post_act": null,
|
| 59 |
+
"up_block_types": [
|
| 60 |
+
"UpBlock2D",
|
| 61 |
+
"CrossAttnUpBlock2D",
|
| 62 |
+
"CrossAttnUpBlock2D",
|
| 63 |
+
"CrossAttnUpBlock2D"
|
| 64 |
+
],
|
| 65 |
+
"upcast_attention": false,
|
| 66 |
+
"update_after_step": 0,
|
| 67 |
+
"use_ema_warmup": false,
|
| 68 |
+
"use_linear_projection": false
|
| 69 |
+
}
|
checkpoint-20000/unet_ema/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa69a706ab85edff167f10c02f1d963c0a4f9beefdb28960ca1bec948e11e194
|
| 3 |
+
size 3438366373
|
checkpoint-40000/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1aa43a1ba9ba25ea21a774b1338012f63ef5f33193fe4afb62cd1431fec4095a
|
| 3 |
+
size 6876749651
|
checkpoint-40000/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54a2390d8a192c7587b1fa0fcbd6e77f9ccbd3f3b4ecbddceab87a8aecb90f5c
|
| 3 |
+
size 14663
|
checkpoint-40000/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94408cfb561631f4532a459d4661dd7fa62b8e58efd9fb1367ba9e9eda944629
|
| 3 |
+
size 557
|
checkpoint-40000/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0651858ab464f74a92e167f623ffd8a7c3ac0508a248cccf2bdcb105a430e75
|
| 3 |
+
size 563
|
checkpoint-40000/unet/config.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.18.0.dev0",
|
| 4 |
+
"_name_or_path": "spatial-relationship/spatial-relationship-model-60-2",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"addition_embed_type": null,
|
| 7 |
+
"addition_embed_type_num_heads": 64,
|
| 8 |
+
"attention_head_dim": 8,
|
| 9 |
+
"block_out_channels": [
|
| 10 |
+
320,
|
| 11 |
+
640,
|
| 12 |
+
1280,
|
| 13 |
+
1280
|
| 14 |
+
],
|
| 15 |
+
"center_input_sample": false,
|
| 16 |
+
"class_embed_type": null,
|
| 17 |
+
"class_embeddings_concat": false,
|
| 18 |
+
"conv_in_kernel": 3,
|
| 19 |
+
"conv_out_kernel": 3,
|
| 20 |
+
"cross_attention_dim": 768,
|
| 21 |
+
"cross_attention_norm": null,
|
| 22 |
+
"down_block_types": [
|
| 23 |
+
"CrossAttnDownBlock2D",
|
| 24 |
+
"CrossAttnDownBlock2D",
|
| 25 |
+
"CrossAttnDownBlock2D",
|
| 26 |
+
"DownBlock2D"
|
| 27 |
+
],
|
| 28 |
+
"downsample_padding": 1,
|
| 29 |
+
"dual_cross_attention": false,
|
| 30 |
+
"encoder_hid_dim": null,
|
| 31 |
+
"encoder_hid_dim_type": null,
|
| 32 |
+
"flip_sin_to_cos": true,
|
| 33 |
+
"freq_shift": 0,
|
| 34 |
+
"in_channels": 4,
|
| 35 |
+
"layers_per_block": 2,
|
| 36 |
+
"mid_block_only_cross_attention": null,
|
| 37 |
+
"mid_block_scale_factor": 1,
|
| 38 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 39 |
+
"norm_eps": 1e-05,
|
| 40 |
+
"norm_num_groups": 32,
|
| 41 |
+
"num_class_embeds": null,
|
| 42 |
+
"only_cross_attention": false,
|
| 43 |
+
"out_channels": 4,
|
| 44 |
+
"projection_class_embeddings_input_dim": null,
|
| 45 |
+
"resnet_out_scale_factor": 1.0,
|
| 46 |
+
"resnet_skip_time_act": false,
|
| 47 |
+
"resnet_time_scale_shift": "default",
|
| 48 |
+
"sample_size": 64,
|
| 49 |
+
"time_cond_proj_dim": null,
|
| 50 |
+
"time_embedding_act_fn": null,
|
| 51 |
+
"time_embedding_dim": null,
|
| 52 |
+
"time_embedding_type": "positional",
|
| 53 |
+
"timestep_post_act": null,
|
| 54 |
+
"up_block_types": [
|
| 55 |
+
"UpBlock2D",
|
| 56 |
+
"CrossAttnUpBlock2D",
|
| 57 |
+
"CrossAttnUpBlock2D",
|
| 58 |
+
"CrossAttnUpBlock2D"
|
| 59 |
+
],
|
| 60 |
+
"upcast_attention": false,
|
| 61 |
+
"use_linear_projection": false
|
| 62 |
+
}
|
checkpoint-40000/unet/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94d801d7c1ecd0de99515fdee52696ad658764cee182397d8a4c26e072d45659
|
| 3 |
+
size 3438375973
|
checkpoint-40000/unet_ema/config.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.18.0.dev0",
|
| 4 |
+
"_name_or_path": "spatial-relationship/spatial-relationship-model-60-2",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"addition_embed_type": null,
|
| 7 |
+
"addition_embed_type_num_heads": 64,
|
| 8 |
+
"attention_head_dim": 8,
|
| 9 |
+
"block_out_channels": [
|
| 10 |
+
320,
|
| 11 |
+
640,
|
| 12 |
+
1280,
|
| 13 |
+
1280
|
| 14 |
+
],
|
| 15 |
+
"center_input_sample": false,
|
| 16 |
+
"class_embed_type": null,
|
| 17 |
+
"class_embeddings_concat": false,
|
| 18 |
+
"conv_in_kernel": 3,
|
| 19 |
+
"conv_out_kernel": 3,
|
| 20 |
+
"cross_attention_dim": 768,
|
| 21 |
+
"cross_attention_norm": null,
|
| 22 |
+
"decay": 0.9999,
|
| 23 |
+
"down_block_types": [
|
| 24 |
+
"CrossAttnDownBlock2D",
|
| 25 |
+
"CrossAttnDownBlock2D",
|
| 26 |
+
"CrossAttnDownBlock2D",
|
| 27 |
+
"DownBlock2D"
|
| 28 |
+
],
|
| 29 |
+
"downsample_padding": 1,
|
| 30 |
+
"dual_cross_attention": false,
|
| 31 |
+
"encoder_hid_dim": null,
|
| 32 |
+
"encoder_hid_dim_type": null,
|
| 33 |
+
"flip_sin_to_cos": true,
|
| 34 |
+
"freq_shift": 0,
|
| 35 |
+
"in_channels": 4,
|
| 36 |
+
"inv_gamma": 1.0,
|
| 37 |
+
"layers_per_block": 2,
|
| 38 |
+
"mid_block_only_cross_attention": null,
|
| 39 |
+
"mid_block_scale_factor": 1,
|
| 40 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 41 |
+
"min_decay": 0.0,
|
| 42 |
+
"norm_eps": 1e-05,
|
| 43 |
+
"norm_num_groups": 32,
|
| 44 |
+
"num_class_embeds": null,
|
| 45 |
+
"only_cross_attention": false,
|
| 46 |
+
"optimization_step": 40000,
|
| 47 |
+
"out_channels": 4,
|
| 48 |
+
"power": 0.6666666666666666,
|
| 49 |
+
"projection_class_embeddings_input_dim": null,
|
| 50 |
+
"resnet_out_scale_factor": 1.0,
|
| 51 |
+
"resnet_skip_time_act": false,
|
| 52 |
+
"resnet_time_scale_shift": "default",
|
| 53 |
+
"sample_size": 64,
|
| 54 |
+
"time_cond_proj_dim": null,
|
| 55 |
+
"time_embedding_act_fn": null,
|
| 56 |
+
"time_embedding_dim": null,
|
| 57 |
+
"time_embedding_type": "positional",
|
| 58 |
+
"timestep_post_act": null,
|
| 59 |
+
"up_block_types": [
|
| 60 |
+
"UpBlock2D",
|
| 61 |
+
"CrossAttnUpBlock2D",
|
| 62 |
+
"CrossAttnUpBlock2D",
|
| 63 |
+
"CrossAttnUpBlock2D"
|
| 64 |
+
],
|
| 65 |
+
"upcast_attention": false,
|
| 66 |
+
"update_after_step": 0,
|
| 67 |
+
"use_ema_warmup": false,
|
| 68 |
+
"use_linear_projection": false
|
| 69 |
+
}
|
checkpoint-40000/unet_ema/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4806a647ec32335cae9ee5c3539944986d5f02047b0c691651761dcc4e7343aa
|
| 3 |
+
size 3438366373
|
feature_extractor/preprocessor_config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"crop_size": {
|
| 3 |
+
"height": 224,
|
| 4 |
+
"width": 224
|
| 5 |
+
},
|
| 6 |
+
"do_center_crop": true,
|
| 7 |
+
"do_convert_rgb": true,
|
| 8 |
+
"do_normalize": true,
|
| 9 |
+
"do_rescale": true,
|
| 10 |
+
"do_resize": true,
|
| 11 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
| 12 |
+
"image_mean": [
|
| 13 |
+
0.48145466,
|
| 14 |
+
0.4578275,
|
| 15 |
+
0.40821073
|
| 16 |
+
],
|
| 17 |
+
"image_processor_type": "CLIPImageProcessor",
|
| 18 |
+
"image_std": [
|
| 19 |
+
0.26862954,
|
| 20 |
+
0.26130258,
|
| 21 |
+
0.27577711
|
| 22 |
+
],
|
| 23 |
+
"resample": 3,
|
| 24 |
+
"rescale_factor": 0.00392156862745098,
|
| 25 |
+
"size": {
|
| 26 |
+
"shortest_edge": 224
|
| 27 |
+
}
|
| 28 |
+
}
|
logs/text2image-fine-tune/1687484281.3629444/events.out.tfevents.1687484281.g023.sol.rc.asu.edu.1227467.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5b247478c0da6240ad86abc16b6a7975a5c709eaed68039daf05c06c86602de
|
| 3 |
+
size 2369
|
logs/text2image-fine-tune/1687484281.4535692/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: image
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687484732.4959297/events.out.tfevents.1687484732.g023.sol.rc.asu.edu.1227888.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f431f7dc56ca70302273798a759240d19201d179c470649773e34c4a89da7a90
|
| 3 |
+
size 2382
|
logs/text2image-fine-tune/1687484732.5515726/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687485306.054261/events.out.tfevents.1687485306.g023.sol.rc.asu.edu.1228654.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b84c4e342c5344d9050d245b8e50137aa8b3111cd9bcf04127ac9ce4677e5a46
|
| 3 |
+
size 2388
|
logs/text2image-fine-tune/1687485306.1146164/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download/train
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687485686.1828926/events.out.tfevents.1687485686.g023.sol.rc.asu.edu.1229451.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab13b2956b2da6daf23d4c8187d1dd18104dab5c1210cf4996edf342810ac011
|
| 3 |
+
size 2370
|
logs/text2image-fine-tune/1687485686.2287989/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 3260
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: download/train
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687488854.9296603/events.out.tfevents.1687488854.g012.sol.rc.asu.edu.891332.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:506c7308c195803b7b4fdbd95df35d91dd138af08383c71acf6e0661afb2f424
|
| 3 |
+
size 2370
|
logs/text2image-fine-tune/1687488854.9867501/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: download/train
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687489415.8570626/events.out.tfevents.1687489415.g012.sol.rc.asu.edu.892320.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37e4b202acc14e752ab8350b68638eb8cb6faca385c664675f53978d91cef045
|
| 3 |
+
size 2370
|
logs/text2image-fine-tune/1687489415.8990405/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: download/train
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687489894.8476286/events.out.tfevents.1687489894.g012.sol.rc.asu.edu.892948.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3642d5b9e0ae6c79ab291513be74f33a6d89f4e7e7ec381b642091d031f3ff50
|
| 3 |
+
size 2370
|
logs/text2image-fine-tune/1687489894.9371443/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: download/train
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687490219.3755434/events.out.tfevents.1687490219.g012.sol.rc.asu.edu.893130.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b78aa484bf9ee37049d656d7d8d02c0af9ef255ee40472d30862feb3374a54cc
|
| 3 |
+
size 2370
|
logs/text2image-fine-tune/1687490219.4269717/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20220
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: download/train
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687567394.2765768/events.out.tfevents.1687567394.g012.sol.rc.asu.edu.1015146.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f641aafd9ce7f8960e0968e731d2fbabdb0e7cb3f0adb1fc6533eb098b9d7c4
|
| 3 |
+
size 2384
|
logs/text2image-fine-tune/1687567394.285055/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20060
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download2/
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1687567582.553664/events.out.tfevents.1687567582.g017.sol.rc.asu.edu.3986616.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65edb5fbead8729df308885a5c4505a6087c07271105281506fc21242177c156
|
| 3 |
+
size 2384
|
logs/text2image-fine-tune/1687567582.5628018/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 20060
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 20
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: wuaszdc/spatial-relationship-model-quarter
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download2/
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1688712104.851267/events.out.tfevents.1688712104.g010.sol.rc.asu.edu.2546128.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34de2865a7fb14c80529c0ee42c6280c67e3b89f55e9c6ae067316f447bf982
|
| 3 |
+
size 2394
|
logs/text2image-fine-tune/1688712105.184274/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 50300
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 50
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: spatial-relationship/spatial-relationship-model-60-2
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download3/
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/1688785360.9873862/events.out.tfevents.1688785360.g010.sol.rc.asu.edu.2589748.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07cc82fa3e6e8a5332d81488f66ffe004d23517a9b82a39d77a227a0b911b7b0
|
| 3 |
+
size 2394
|
logs/text2image-fine-tune/1688785361.2153516/hparams.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
adam_epsilon: 1.0e-08
|
| 4 |
+
adam_weight_decay: 0.01
|
| 5 |
+
allow_tf32: false
|
| 6 |
+
cache_dir: null
|
| 7 |
+
caption_column: additional_feature
|
| 8 |
+
center_crop: false
|
| 9 |
+
checkpointing_steps: 20000
|
| 10 |
+
checkpoints_total_limit: null
|
| 11 |
+
dataloader_num_workers: 0
|
| 12 |
+
dataset_config_name: null
|
| 13 |
+
dataset_name: null
|
| 14 |
+
enable_xformers_memory_efficient_attention: false
|
| 15 |
+
gradient_accumulation_steps: 4
|
| 16 |
+
gradient_checkpointing: false
|
| 17 |
+
hub_model_id: spatial-relationship-model-back
|
| 18 |
+
hub_token: null
|
| 19 |
+
image_column: image
|
| 20 |
+
input_perturbation: 0
|
| 21 |
+
learning_rate: 1.0e-05
|
| 22 |
+
local_rank: -1
|
| 23 |
+
logging_dir: logs
|
| 24 |
+
lr_scheduler: cosine_with_restarts
|
| 25 |
+
lr_warmup_steps: 0
|
| 26 |
+
max_grad_norm: 1.0
|
| 27 |
+
max_train_samples: null
|
| 28 |
+
max_train_steps: 49100
|
| 29 |
+
mixed_precision: null
|
| 30 |
+
noise_offset: 0
|
| 31 |
+
non_ema_revision: null
|
| 32 |
+
num_train_epochs: 50
|
| 33 |
+
output_dir: /scratch/dzhan120/spatial-relationship-model-back
|
| 34 |
+
prediction_type: null
|
| 35 |
+
pretrained_model_name_or_path: spatial-relationship/spatial-relationship-model-60-2
|
| 36 |
+
push_to_hub: true
|
| 37 |
+
random_flip: false
|
| 38 |
+
report_to: tensorboard
|
| 39 |
+
resolution: 512
|
| 40 |
+
resume_from_checkpoint: null
|
| 41 |
+
revision: null
|
| 42 |
+
scale_lr: false
|
| 43 |
+
seed: 42
|
| 44 |
+
snr_gamma: null
|
| 45 |
+
tracker_project_name: text2image-fine-tune
|
| 46 |
+
train_batch_size: 2
|
| 47 |
+
train_data_dir: /scratch/dzhan120/download3/
|
| 48 |
+
use_8bit_adam: false
|
| 49 |
+
use_ema: true
|
| 50 |
+
validation_epochs: 5
|
logs/text2image-fine-tune/events.out.tfevents.1687484281.g023.sol.rc.asu.edu.1227467.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:138298a66171be310916e948b6152397a91cadc28658dadedce471335a0054df
|
| 3 |
+
size 88
|
logs/text2image-fine-tune/events.out.tfevents.1687484730.g023.sol.rc.asu.edu.1227888.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c1ce35b15df38d61cfd1ba4750767aca9cde40471f5f5567b296f9f8f424ac7
|
| 3 |
+
size 88
|
logs/text2image-fine-tune/events.out.tfevents.1687485305.g023.sol.rc.asu.edu.1228654.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff35cc848499a5d7f42a65ddf99491dbd3851edb43192678c120866b4ddc52a
|
| 3 |
+
size 88
|
logs/text2image-fine-tune/events.out.tfevents.1687485686.g023.sol.rc.asu.edu.1229451.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b289a0f1a35d40b5e10c085264f002a58ed580dba4c588c35c867bff1864ed42
|
| 3 |
+
size 88
|
logs/text2image-fine-tune/events.out.tfevents.1687488854.g012.sol.rc.asu.edu.891332.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b85d7a04801092388945d66bdd1f6c0f886dc4d629afe1dd1e752a9e6c54fc3
|
| 3 |
+
size 88
|
logs/text2image-fine-tune/events.out.tfevents.1687489415.g012.sol.rc.asu.edu.892320.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05443396f7518fdc6413a397d894b622df25a4c5b473db33130fdcc605657649
|
| 3 |
+
size 88
|
logs/text2image-fine-tune/events.out.tfevents.1687489894.g012.sol.rc.asu.edu.892948.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8df7aed7db0f19f0a9fd3d0cc28308ec73fabb11a1ba645b2ec9d9c7cd877798
|
| 3 |
+
size 1960
|
logs/text2image-fine-tune/events.out.tfevents.1687490219.g012.sol.rc.asu.edu.893130.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5ad9c24a76c5c33c7d9ebdd157ff435df6ddac9b5c57cbcb9a281359c29f2e8
|
| 3 |
+
size 1096
|
logs/text2image-fine-tune/events.out.tfevents.1687567394.g012.sol.rc.asu.edu.1015146.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa77d45cc5c1785bc9f02b7fca85e4647975b188e920c93ab3e90c46baac74c0
|
| 3 |
+
size 8389
|