Tuyabei commited on
Commit
6d1b386
·
verified ·
1 Parent(s): 50f4dfb

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config_temporal.yaml +166 -0
  2. model.ckpt +3 -0
config_temporal.yaml ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ pretrained_checkpoint: /mnt/petrelfs/quxiaoye/tj/DynamiCrafter-dev/ckpt/cm64/model.ckpt
3
+ base_learning_rate: 1.0e-05
4
+ scale_lr: False
5
+ target: lvdm.models.ddpm3d.LatentVisualDiffusion
6
+ params:
7
+ rescale_betas_zero_snr: True
8
+ parameterization: "v"
9
+ linear_start: 0.00085
10
+ linear_end: 0.012
11
+ num_timesteps_cond: 1
12
+ log_every_t: 200
13
+ timesteps: 1000
14
+ first_stage_key: video
15
+ cond_stage_key: caption
16
+ cond_stage_trainable: False
17
+ image_proj_model_trainable: False
18
+ conditioning_key: hybrid
19
+ image_size: [40, 64]
20
+ channels: 4
21
+ scale_by_std: False
22
+ scale_factor: 0.18215
23
+ use_ema: False
24
+ uncond_prob: 0.05
25
+ uncond_type: 'empty_seq'
26
+ rand_cond_frame: false
27
+ use_dynamic_rescale: True
28
+ base_scale: 0.7
29
+ fps_condition_type: 'fps'
30
+ perframe_ae: True
31
+
32
+ unet_config:
33
+ target: lvdm.modules.networks.openaimodel3d.UNetModel
34
+ params:
35
+ in_channels: 8
36
+ out_channels: 4
37
+ model_channels: 320
38
+ attention_resolutions:
39
+ - 4
40
+ - 2
41
+ - 1
42
+ num_res_blocks: 2
43
+ channel_mult:
44
+ - 1
45
+ - 2
46
+ - 4
47
+ - 4
48
+ dropout: 0.1
49
+ num_head_channels: 64
50
+ transformer_depth: 1
51
+ context_dim: 1024
52
+ use_linear: True
53
+ use_checkpoint: True
54
+ temporal_conv: True
55
+ temporal_attention: True
56
+ temporal_selfatt_only: False
57
+ use_relative_position: False
58
+ use_causal_attention: False
59
+ temporal_length: 16
60
+ addition_attention: True
61
+ image_cross_attention: True
62
+ default_fs: 10
63
+ fs_condition: True
64
+
65
+ first_stage_config:
66
+ target: lvdm.models.autoencoder.AutoencoderKL
67
+ params:
68
+ embed_dim: 4
69
+ monitor: val/rec_loss
70
+ ddconfig:
71
+ double_z: True
72
+ z_channels: 4
73
+ resolution: 256
74
+ in_channels: 3
75
+ out_ch: 3
76
+ ch: 128
77
+ ch_mult:
78
+ - 1
79
+ - 2
80
+ - 4
81
+ - 4
82
+ num_res_blocks: 2
83
+ attn_resolutions: []
84
+ dropout: 0.0
85
+ lossconfig:
86
+ target: torch.nn.Identity
87
+
88
+ cond_stage_config:
89
+ target: lvdm.modules.encoders.condition.FrozenOpenCLIPEmbedder
90
+ params:
91
+ freeze: True
92
+ layer: "penultimate"
93
+
94
+ img_cond_stage_config:
95
+ target: lvdm.modules.encoders.condition.FrozenOpenCLIPImageEmbedderV2
96
+ params:
97
+ freeze: True
98
+
99
+ image_proj_stage_config:
100
+ target: lvdm.modules.encoders.resampler.Resampler
101
+ params:
102
+ dim: 1024
103
+ depth: 4
104
+ dim_head: 64
105
+ heads: 12
106
+ num_queries: 16
107
+ embedding_dim: 1280
108
+ output_dim: 1024
109
+ ff_mult: 4
110
+ video_length: 16
111
+
112
+ data:
113
+ target: utils_data.DataModuleFromConfig
114
+ params:
115
+ batch_size: 2
116
+ num_workers: 12
117
+ wrap: False
118
+ train:
119
+ target: lvdm.data.webvid.WebVid
120
+ params:
121
+ data_dir: /nvme/quxiaoye/webvid10m # <WebVid10M DATA>
122
+ meta_path: /mnt/petrelfs/quxiaoye/tj/data/dataset/WebMotion/webmotion-36k.csv #<.csv FILE>
123
+ video_length: 16
124
+ frame_stride: 6
125
+ load_raw_resolution: True
126
+ resolution: [320, 512]
127
+ spatial_transform: resize_center_crop
128
+ random_fs: True ## if True, we uniformly sample fs with max_fs=frame_stride (above)
129
+
130
+ lightning:
131
+ precision: 16
132
+ # strategy: deepspeed_stage_2
133
+ trainer:
134
+ benchmark: True
135
+ accumulate_grad_batches: 2
136
+ max_steps: 8000
137
+ # logger
138
+ log_every_n_steps: 50
139
+ # val
140
+ val_check_interval: 0.5
141
+ gradient_clip_algorithm: 'norm'
142
+ gradient_clip_val: 0.5
143
+ callbacks:
144
+ model_checkpoint:
145
+ target: pytorch_lightning.callbacks.ModelCheckpoint
146
+ params:
147
+ every_n_train_steps: 800 #1000
148
+ filename: "{epoch}-{step}"
149
+ save_weights_only: True
150
+ metrics_over_trainsteps_checkpoint:
151
+ target: pytorch_lightning.callbacks.ModelCheckpoint
152
+ params:
153
+ filename: '{epoch}-{step}'
154
+ save_weights_only: True
155
+ every_n_train_steps: 2000 #20000 # 3s/step*2w=
156
+ batch_logger:
157
+ target: callbacks.ImageLogger
158
+ params:
159
+ batch_frequency: 200
160
+ to_local: False
161
+ max_images: 8
162
+ log_images_kwargs:
163
+ ddim_steps: 50
164
+ unconditional_guidance_scale: 7.5
165
+ timestep_spacing: uniform_trailing
166
+ guidance_rescale: 0.7
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f302a4784f9e3118f46d0e9872870e172ca3d42ebdbb91e6073f2dc8ae5d5f8c
3
+ size 10558440205