zjuJish commited on
Commit
d5269b2
·
verified ·
1 Parent(s): 36de38f

Upload alpha_work/configs/test_sd_1_vid_1_f_v2_new_bg_scale_v9_sm_v2_stage2_loadself_scale_test.yaml with huggingface_hub

Browse files
alpha_work/configs/test_sd_1_vid_1_f_v2_new_bg_scale_v9_sm_v2_stage2_loadself_scale_test.yaml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exp_root_dir: exp
2
+ exp_name: layer_diffusion_sd_ft_3_vid_v4.2_new_bg_scale_v9_sm_v2_stage2_new_loadself_scale_test
3
+ mode: test # test
4
+ image_finetune: False
5
+
6
+ # pretrained_model_path: /home/nfs/wyy/models/sd-image-variations-diffusers
7
+ # pretrained_model_path: /mnt/workspace/workgroup/sihui.jsh/DisCo10.9/diffusers/RealisticVision-v2-1
8
+ pretrained_model_path: diffusers_/stable-diffusion-v1-5
9
+ # pretrained_unet_path: exp/ckpts/layer_diffusion_sd_ft_2/epoch_200.pth
10
+ # pretrained_trans_lora: exp/ckpts/layer_diffusion_sd_ft_3_vid_img_v2.1/epoch_73.pth
11
+ pretrained_trans_lora: exp/ckpts/layer_diffusion_sd_ft_3_vid_img_v2.2.1/epoch_40.pth
12
+ # pretrained_trans_lora: ""
13
+ # pretrained_vae_path: ../alpha_work/diffusers/LayerDiffusion
14
+ pretrained_vae_path: exp/ckpts/layer_diffusion_2/epoch_6.pth
15
+ data_root_dir: ../
16
+ seed: 22
17
+
18
+ load_state: True
19
+ load_state_exp_name: layer_diffusion_sd_ft_3_vid_v4.2_new_bg_scale_v9_sm_v2_stage2_new_loadself_scale
20
+ load_state_epoch: 18
21
+
22
+ learning_rate: 0.00002
23
+ decay: 0.001
24
+
25
+ use_text_embedding: True
26
+ text_encoder: clip
27
+
28
+ save_state_epoch: 1
29
+ num_epochs: 200
30
+ batch_size: 1 #
31
+ eval_batch_size: 1 #8
32
+ eval_step: 3000
33
+ eval_scheduler: ddim
34
+ eval_image_num: 50
35
+
36
+ image_encoder: dinov2
37
+ refer_sdvae: True
38
+ unet_trainable_module: up_spatial_lora # transformer, cross_attn, self_cross_attn, all
39
+ ref_encoder_type: dinov2 # clip_all, clip_global, dinov2, clip_multi
40
+ use_clip_proj: False
41
+ cfg_zero_image_first: False
42
+
43
+ num_inf_images_per_prompt: 1
44
+ num_inference_steps: 25
45
+ scale_factor: 0.18215
46
+ guidance_scale: 8.5
47
+ cfg_eta: 0.1
48
+ controlnet_conditioning_scale: 1.0
49
+
50
+ device: cuda
51
+ mixed_precision: 'fp16' #'no','fp16','bf16 or 'fp8'
52
+ enable_xformers_memory_efficient_attention: True
53
+ gradient_checkpointing: True
54
+ gradient_accumulate_steps: 2
55
+ num_workers: 8
56
+
57
+ unet_additional_kwargs:
58
+ use_inflated_groupnorm: true
59
+ use_motion_module: true
60
+ motion_module_resolutions: [1,2,4,8]
61
+ motion_module_mid_block: false
62
+ motion_module_type: Vanilla
63
+
64
+ motion_module_kwargs:
65
+ num_attention_heads: 8
66
+ num_transformer_block: 1
67
+ attention_block_types: [ "Temporal_Self", "Temporal_Self" ]
68
+ temporal_position_encoding: true
69
+ temporal_position_encoding_max_len: 32
70
+ temporal_attention_dim_div: 1
71
+ zero_initialize: true
72
+
73
+ noise_scheduler_kwargs:
74
+ beta_start: 0.00085
75
+ beta_end: 0.012
76
+ beta_schedule: "linear"
77
+ steps_offset: 1
78
+ clip_sample: False
79
+
80
+ # train_data:
81
+ # csv_train_path_list: ["/mnt/workspace/workgroup/video_datasets/WebVid/meta/results_2M_train.csv"]
82
+ # csv_test_path_list: ["/mnt/workspace/workgroup/video_datasets/WebVid/meta/results_2M_train.csv"]
83
+ # json_train_path_list: ["../layer_diff_dataset/video/video_all.json"]
84
+ # json_test_path_list: ["../layer_diff_dataset/video/video_test.json"]
85
+ # video_train_folder_list: ["../layervid/MeViS/valid_u"]
86
+ video_train_folder_list: ['../data/DIS-TR']
87
+ # video_train_folder_list: ['../data/DIS-TR','../data/MULAN','../data/AM-2k/train',"../data/video_dataset/YoutubeVOS/train"]
88
+ # video_test_folder_list: ["../layervid/MeViS/valid_u"]
89
+ video_test_folder_list: ["../data/video_dataset/YoutubeVOS/train"]
90
+ # mask_train_folder_list: ["/mnt/workspace/workgroup/yuanpeng/videoedit/webvid/Segment-and-Track-Anything/savemask/WebVid_short"]
91
+ # mask_test_folder_list: ["/mnt/workspace/workgroup/yuanpeng/videoedit/webvid/Segment-and-Track-Anything/savemask/WebVid_short"]
92
+ train_json_name: "meta_v5.json"
93
+ test_json_name: "meta_user_study.json"
94
+ img_size: [256,256]
95
+ sample_stride: 1
96
+ sample_n_frames: 16
97
+ num_frames: 16
98
+
99
+ # adapter_lora_path: "../AnimateDiff/models/Motion_Module/v3_sd15_adapter.ckpt"
100
+ adapter_lora_path: ""
101
+ dreambooth_model_path: "../AnimateDiff/models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
102
+ # dreambooth_model_path: ""
103
+ lora_model_path: ""
104
+ inference_config: "configs/inference/inference-v3.yaml"
105
+ motion_module_path: "../AnimateDiff/models/Motion_Module/v3_sd15_mm.ckpt"
106
+ controlnet_config: "configs/inference/sparsectrl/latent_condition.yaml"
107
+ controlnet_path: "../AnimateDiff/models/SparseCtrl/v3_sd15_sparsectrl_rgb.ckpt"
108
+ motion_module_lora_configs: []
109
+
110
+ list_vae: ["encoder.mid_block.attentions.0.key.bias", "encoder.mid_block.attentions.0.key.weight", "encoder.mid_block.attentions.0.proj_attn.bias", "encoder.mid_block.attentions.0.proj_attn.weight", "encoder.mid_block.attentions.0.query.bias", "encoder.mid_block.attentions.0.query.weight", "encoder.mid_block.attentions.0.value.bias", "encoder.mid_block.attentions.0.value.weight", "decoder.mid_block.attentions.0.key.bias", "decoder.mid_block.attentions.0.key.weight", "decoder.mid_block.attentions.0.proj_attn.bias", "decoder.mid_block.attentions.0.proj_attn.weight", "decoder.mid_block.attentions.0.query.bias", "decoder.mid_block.attentions.0.query.weight", "decoder.mid_block.attentions.0.value.bias", "decoder.mid_block.attentions.0.value.weight"]
111
+ frames: 3
112
+
113
+ enable_lora: True
114
+ motion_lora_rank: 4
115
+ spatial_lora_rank: 64
116
+ load_lora: True
117
+ # load_lora_exp_name: layer_diffusion_sd_ft_3_vid_v4.2_lora_1
118
+ load_lora_exp_name: layer_diffusion_sd_ft_3_vid_v4.2_new_bg_scale_v9_sm_v2.1
119
+ load_lora_epoch: 5
120
+ is_stage2: True
121
+
122
+ module_mapping_sd15: {
123
+ 0: 'down_blocks.0.attentions.0.transformer_blocks.0.attn1',
124
+ 1: 'down_blocks.0.attentions.0.transformer_blocks.0.attn2',
125
+ 2: 'down_blocks.0.attentions.1.transformer_blocks.0.attn1',
126
+ 3: 'down_blocks.0.attentions.1.transformer_blocks.0.attn2',
127
+ 4: 'down_blocks.1.attentions.0.transformer_blocks.0.attn1',
128
+ 5: 'down_blocks.1.attentions.0.transformer_blocks.0.attn2',
129
+ 6: 'down_blocks.1.attentions.1.transformer_blocks.0.attn1',
130
+ 7: 'down_blocks.1.attentions.1.transformer_blocks.0.attn2',
131
+ 8: 'down_blocks.2.attentions.0.transformer_blocks.0.attn1',
132
+ 9: 'down_blocks.2.attentions.0.transformer_blocks.0.attn2',
133
+ 10: 'down_blocks.2.attentions.1.transformer_blocks.0.attn1',
134
+ 11: 'down_blocks.2.attentions.1.transformer_blocks.0.attn2',
135
+ 12: 'up_blocks.1.attentions.0.transformer_blocks.0.attn1',
136
+ 13: 'up_blocks.1.attentions.0.transformer_blocks.0.attn2',
137
+ 14: 'up_blocks.1.attentions.1.transformer_blocks.0.attn1',
138
+ 15: 'up_blocks.1.attentions.1.transformer_blocks.0.attn2',
139
+ 16: 'up_blocks.1.attentions.2.transformer_blocks.0.attn1',
140
+ 17: 'up_blocks.1.attentions.2.transformer_blocks.0.attn2',
141
+ 18: 'up_blocks.2.attentions.0.transformer_blocks.0.attn1',
142
+ 19: 'up_blocks.2.attentions.0.transformer_blocks.0.attn2',
143
+ 20: 'up_blocks.2.attentions.1.transformer_blocks.0.attn1',
144
+ 21: 'up_blocks.2.attentions.1.transformer_blocks.0.attn2',
145
+ 22: 'up_blocks.2.attentions.2.transformer_blocks.0.attn1',
146
+ 23: 'up_blocks.2.attentions.2.transformer_blocks.0.attn2',
147
+ 24: 'up_blocks.3.attentions.0.transformer_blocks.0.attn1',
148
+ 25: 'up_blocks.3.attentions.0.transformer_blocks.0.attn2',
149
+ 26: 'up_blocks.3.attentions.1.transformer_blocks.0.attn1',
150
+ 27: 'up_blocks.3.attentions.1.transformer_blocks.0.attn2',
151
+ 28: 'up_blocks.3.attentions.2.transformer_blocks.0.attn1',
152
+ 29: 'up_blocks.3.attentions.2.transformer_blocks.0.attn2',
153
+ 30: 'mid_block.attentions.0.transformer_blocks.0.attn1',
154
+ 31: 'mid_block.attentions.0.transformer_blocks.0.attn2'
155
+ }