zheedong commited on
Commit
7d88e0e
·
1 Parent(s): f1c1633

SlotTok init

Browse files
Files changed (2) hide show
  1. config.yaml +142 -0
  2. slottok.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cfg_path": "configs/training/stage1/stage1_decoder/slot_qformer_stage1_decoder_slot_adapter_rvq.yaml",
3
+ "tokenizer_cfg_path": "configs/tokenizer/seed_llama_tokenizer_hf.yaml",
4
+ "transform_cfg_path": "configs/transform/slot_transform.yaml",
5
+ "model_cfg_path": "configs/llm/seed_llama_8b.yaml",
6
+ "result_file_path": "./logs/slot_diffusion",
7
+ "checkpoint_path": {
8
+ "model_path": "pretrained/blip2_pretrained.pth",
9
+ "diffusion_model_path": "stabilityai/stable-diffusion-2-1-unclip"
10
+ },
11
+ "test": false,
12
+ "resume": false,
13
+ "load_weight": true,
14
+ "weight_path": "logs/slot_stage1_related/1202_slot_qformer_stage1_unclip_itc05_iter1morph_align0_v2_test_train_unfreeze_align01/checkpoints/last.ckpt",
15
+ "eval": false,
16
+ "dist": {
17
+ "n_gpus": 8,
18
+ "n_nodes": 1
19
+ },
20
+ "dataset": {
21
+ "train_config": {
22
+ "dataset_configs": [
23
+ "configs/data/cc15m-total.yaml",
24
+ "configs/data/laion-coco.yaml",
25
+ "configs/data/mscoco.yaml",
26
+ "configs/data/unsplash.yaml"
27
+ ],
28
+ "weights": [
29
+ 15,
30
+ 24,
31
+ 1,
32
+ 4
33
+ ],
34
+ "shardshuffle": 100,
35
+ "resampled": true,
36
+ "world_size": 1,
37
+ "one_epoch_data_size": 1000000
38
+ },
39
+ "val_config": {
40
+ "karpathy_file_path": "coco/annotations/karpathy/dataset_coco_test.json",
41
+ "root_dir": "coco/images/val2014",
42
+ "start_index": 0,
43
+ "end_index": 256,
44
+ "use_coco_val": true
45
+ },
46
+ "num_workers": 16,
47
+ "shuffle": true,
48
+ "text_max_length": 32
49
+ },
50
+ "stage1": {
51
+ "init": "SEED",
52
+ "dino_model_name": "dinov2_vitl14",
53
+ "unfreeze_unet": true,
54
+ "unfreeze_unet_crossattn": false,
55
+ "unfreeze_resnet": false,
56
+ "image_size": 256,
57
+ "loss_weight": {
58
+ "loss_itc": 0.5,
59
+ "loss_lm": 0.5,
60
+ "loss_diffusion": 1,
61
+ "loss_mse": 0.5,
62
+ "loss_slot_align": 0.1
63
+ },
64
+ "use_causal": true,
65
+ "use_blip_itc": false,
66
+ "freeze_qformer_self_attn": false,
67
+ "layer_norm": true,
68
+ "visual_embedding_encoder_as_mlp": true,
69
+ "use_proj": true,
70
+ "use_slot": true,
71
+ "slot_config": {
72
+ "T": 1,
73
+ "num_iterations": 1,
74
+ "use_half_slot": false,
75
+ "type": "morph"
76
+ },
77
+ "val_reverse": true,
78
+ "permute_aug": null,
79
+ "permute_aug_type": null,
80
+ "use_pos_unet": null,
81
+ "use_unet_slotadapt": true
82
+ },
83
+ "stage2": {
84
+ "bypass_codebook": false,
85
+ "load_diffusion": true,
86
+ "vq": {
87
+ "type": "vq2",
88
+ "replace_codes": true,
89
+ "replacement_num_batches": 1000,
90
+ "discarding_threshold": 0.01,
91
+ "legacy": false,
92
+ "vq_type": "residual_vq",
93
+ "num_quantizers": 4,
94
+ "codebook_embed_dim": 32,
95
+ "n_embed": 8192
96
+ },
97
+ "loss_weight": {
98
+ "loss_codebook": 1,
99
+ "loss_recon": 1,
100
+ "loss_generation_embedding": 0.5,
101
+ "loss_diffusion": 0.1,
102
+ "loss_mse": 0.1,
103
+ "loss_slot_align": 0.1
104
+ },
105
+ "recon_s": true,
106
+ "unfreeze_unet": false,
107
+ "unfreeze_linear": false,
108
+ "blocks_layers": 4,
109
+ "blocks_image_layers": 4,
110
+ "use_blocks_image": true,
111
+ "unclip": false
112
+ },
113
+ "experiment": {
114
+ "seed": 42,
115
+ "stage": 2,
116
+ "local_batch_size": 128,
117
+ "val_batch_size": 8,
118
+ "test_split": "train",
119
+ "max_epochs": 50,
120
+ "deterministic": false,
121
+ "grad_accumulation": 1,
122
+ "check_val_every_n_epoch": 1,
123
+ "enable_checkpointing": true,
124
+ "log_every_n_steps": 1,
125
+ "num_sanity_val_steps": 1,
126
+ "num_warmup_steps": 500,
127
+ "grad_clip_val": 2,
128
+ "find_unused_parameters": true,
129
+ "total_training_steps": 48828.125
130
+ },
131
+ "optimizer": {
132
+ "vit_precision": "fp32",
133
+ "diffusion_precision": "fp16",
134
+ "precision": "bf16",
135
+ "max_lr": 0.0001
136
+ },
137
+ "hyperparameters": {
138
+ "beta_1": 0.9,
139
+ "beta_2": 0.999,
140
+ "weight_decay": 0.01
141
+ }
142
+ }
slottok.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b92ebfe4d13bf2474dec5cd82b855224cbebd1255880d9c3bd35f5a7359889
3
+ size 9774512137