yuegao commited on
Commit
4bd2d52
·
verified ·
1 Parent(s): ff014ac

Upload folder using huggingface_hub

Browse files
Files changed (19) hide show
  1. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/10000-ema/mp_rank_00_model_states.pt +3 -0
  2. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/10000/mp_rank_00_model_states.pt +3 -0
  3. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/latest +1 -0
  4. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/model_config.json +4 -0
  5. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/training_config.yaml +219 -0
  6. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/.DS_Store +0 -0
  7. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/10000-ema/mp_rank_00_model_states.pt +3 -0
  8. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/10000/mp_rank_00_model_states.pt +3 -0
  9. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/latest +1 -0
  10. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/model_config.json +4 -0
  11. cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/training_config.yaml +219 -0
  12. cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/10000-ema/mp_rank_00_model_states.pt +3 -0
  13. cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/10000/mp_rank_00_model_states.pt +3 -0
  14. cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/latest +1 -0
  15. cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/model_config.json +4 -0
  16. cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/training_config.yaml +219 -0
  17. zero123_finetune_logs/2024-04-24T14-04-16_scalar_flow/checkpoints/step=000015499.ckpt +3 -0
  18. zero123_finetune_logs/2024-10-28T18-26-21_fluid_nexus_ball/checkpoints/step=000079999.ckpt +3 -0
  19. zero123_finetune_logs/2024-10-30T06-31-08_fluid_nexus_smoke/checkpoints/step=000051999.ckpt +3 -0
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/10000-ema/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7076e10568d7aceb600582ed7e537d7ea921b404f97a95e769b832dfdec7e7f
3
+ size 42463648157
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/10000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5c3a9db5de4978b30c1d53ea7cd0847a67dbf4b87071d29eb11720133acb9c
3
+ size 42463648157
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ 10000
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/model_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_class": "SATVideoDiffusionEngine",
3
+ "model_parallel_size": 1
4
+ }
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_ball_i2v_fix3samples-10-28-17-06/training_config.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ scale_factor: 0.7
3
+ disable_first_stage_autocast: true
4
+ not_trainable_prefixes:
5
+ - all
6
+ log_keys:
7
+ - txt
8
+ denoiser_config:
9
+ target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
10
+ params:
11
+ num_idx: 1000
12
+ quantize_c_noise: false
13
+ weighting_config:
14
+ target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
15
+ scaling_config:
16
+ target: sgm.modules.diffusionmodules.denoiser_scaling.VideoScaling
17
+ discretization_config:
18
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
19
+ params:
20
+ shift_scale: 1.0
21
+ network_config:
22
+ target: dit_video_concat.DiffusionTransformer
23
+ params:
24
+ time_embed_dim: 512
25
+ elementwise_affine: true
26
+ num_frames: 49
27
+ time_compressed_rate: 4
28
+ latent_width: 90
29
+ latent_height: 60
30
+ num_layers: 42
31
+ patch_size: 2
32
+ in_channels: 16
33
+ out_channels: 16
34
+ hidden_size: 3072
35
+ adm_in_channels: 256
36
+ num_attention_heads: 48
37
+ transformer_args:
38
+ checkpoint_activations: true
39
+ vocab_size: 1
40
+ max_sequence_length: 64
41
+ layernorm_order: pre
42
+ skip_init: false
43
+ model_parallel_size: 1
44
+ is_decoder: false
45
+ modules:
46
+ pos_embed_config:
47
+ target: dit_video_concat.Rotary3DPositionEmbeddingMixin
48
+ params:
49
+ hidden_size_head: 64
50
+ text_length: 226
51
+ lora_config:
52
+ target: sat.model.finetune.lora2.LoraMixin
53
+ params:
54
+ r: 128
55
+ patch_embed_config:
56
+ target: dit_video_concat.ImagePatchEmbeddingMixin
57
+ params:
58
+ text_hidden_size: 4096
59
+ adaln_layer_config:
60
+ target: dit_video_concat.AdaLNMixin
61
+ params:
62
+ qk_ln: true
63
+ final_layer_config:
64
+ target: dit_video_concat.FinalLayerMixin
65
+ conditioner_config:
66
+ target: sgm.modules.GeneralConditioner
67
+ params:
68
+ emb_models:
69
+ - is_trainable: false
70
+ input_key: txt
71
+ ucg_rate: 0.1
72
+ target: sgm.modules.encoders.modules.FrozenT5Embedder
73
+ params:
74
+ model_dir: /path/to/FluidNexusRoot/cogvideox-sat/2b/t5-v1_1-xxl
75
+ max_length: 226
76
+ first_stage_config:
77
+ target: vae_modules.autoencoder.VideoAutoencoderInferenceWrapper
78
+ params:
79
+ cp_size: 1
80
+ ckpt_path: /path/to/FluidNexusRoot/cogvideox-sat/5b/vae/3d-vae.pt
81
+ ignore_keys:
82
+ - loss
83
+ loss_config:
84
+ target: torch.nn.Identity
85
+ regularizer_config:
86
+ target: vae_modules.regularizers.DiagonalGaussianRegularizer
87
+ encoder_config:
88
+ target: vae_modules.cp_enc_dec.ContextParallelEncoder3D
89
+ params:
90
+ double_z: true
91
+ z_channels: 16
92
+ resolution: 256
93
+ in_channels: 3
94
+ out_ch: 3
95
+ ch: 128
96
+ ch_mult:
97
+ - 1
98
+ - 2
99
+ - 2
100
+ - 4
101
+ attn_resolutions: []
102
+ num_res_blocks: 3
103
+ dropout: 0.0
104
+ gather_norm: true
105
+ decoder_config:
106
+ target: vae_modules.cp_enc_dec.ContextParallelDecoder3D
107
+ params:
108
+ double_z: true
109
+ z_channels: 16
110
+ resolution: 256
111
+ in_channels: 3
112
+ out_ch: 3
113
+ ch: 128
114
+ ch_mult:
115
+ - 1
116
+ - 2
117
+ - 2
118
+ - 4
119
+ attn_resolutions: []
120
+ num_res_blocks: 3
121
+ dropout: 0.0
122
+ gather_norm: false
123
+ loss_fn_config:
124
+ target: sgm.modules.diffusionmodules.loss.VideoDiffusionLoss
125
+ params:
126
+ fixed_frames: 3
127
+ offset_noise_level: 0
128
+ sigma_sampler_config:
129
+ target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
130
+ params:
131
+ uniform_sampling: true
132
+ num_idx: 1000
133
+ discretization_config:
134
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
135
+ params:
136
+ shift_scale: 1.0
137
+ sampler_config:
138
+ target: sgm.modules.diffusionmodules.sampling.VPSDEDPMPP2MSampler
139
+ params:
140
+ num_steps: 50
141
+ verbose: true
142
+ discretization_config:
143
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
144
+ params:
145
+ shift_scale: 1.0
146
+ guider_config:
147
+ target: sgm.modules.diffusionmodules.guiders.DynamicCFG
148
+ params:
149
+ scale: 6
150
+ exp: 5
151
+ num_steps: 50
152
+ args:
153
+ checkpoint_activations: true
154
+ model_parallel_size: 1
155
+ experiment_name: lora_cogvidx5b_realcapture_blackbluecloudredball_i2v_fix3samples
156
+ mode: finetune
157
+ load: /path/to/FluidNexusRoot/cogvideox-sat/5b/transformer
158
+ no_load_rng: true
159
+ train_iters: 10000
160
+ eval_iters: 1
161
+ eval_interval: 1000
162
+ eval_batch_size: 1
163
+ save: /path/to/FluidNexusRoot/cogvideox_lora_ckpts
164
+ summary_dir: /path/to/FluidNexusRoot/cogvideox_lora_runs
165
+ save_interval: 500
166
+ log_interval: 100
167
+ train_data:
168
+ - /dev/shm/RealCaptureBlackBlueCloudRedBallSetData_cogvideox_dataset
169
+ valid_data:
170
+ - /dev/shm/RealCaptureBlackBlueCloudRedBallSetData_cogvideox_dataset_sub_235
171
+ split: 1,0,0
172
+ num_workers: 8
173
+ force_train: true
174
+ only_log_video_latents: false
175
+ data:
176
+ target: data_video.SFTDataset
177
+ params:
178
+ video_size:
179
+ - 480
180
+ - 720
181
+ fps: 8
182
+ max_num_frames: 49
183
+ skip_frms_num: 0
184
+ deepspeed:
185
+ train_micro_batch_size_per_gpu: 2
186
+ gradient_accumulation_steps: 1
187
+ steps_per_print: 50
188
+ gradient_clipping: 0.1
189
+ zero_optimization:
190
+ stage: 2
191
+ cpu_offload: false
192
+ contiguous_gradients: false
193
+ overlap_comm: true
194
+ reduce_scatter: true
195
+ reduce_bucket_size: 1000000000
196
+ allgather_bucket_size: 1000000000
197
+ load_from_fp32_weights: false
198
+ zero_allow_untested_optimizer: true
199
+ bf16:
200
+ enabled: true
201
+ fp16:
202
+ enabled: false
203
+ loss_scale: 0
204
+ loss_scale_window: 400
205
+ hysteresis: 2
206
+ min_loss_scale: 1
207
+ optimizer:
208
+ type: sat.ops.FusedEmaAdam
209
+ params:
210
+ lr: 0.001
211
+ betas:
212
+ - 0.9
213
+ - 0.95
214
+ eps: 1.0e-08
215
+ weight_decay: 0.0001
216
+ activation_checkpointing:
217
+ partition_activations: false
218
+ contiguous_memory_optimization: false
219
+ wall_clock_breakdown: false
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/.DS_Store ADDED
Binary file (6.15 kB). View file
 
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/10000-ema/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:576a198a53cb3b73b2549e1da890071c23ecd3ae8f7394f5c96e789351e54e60
3
+ size 42463648157
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/10000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c8790a90eb62fbb115438a141d87695f452f2d4f8edf518c3621d46c3344e6
3
+ size 42463648157
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ 10000
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/model_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_class": "SATVideoDiffusionEngine",
3
+ "model_parallel_size": 1
4
+ }
cogvideox_lora_ckpts/lora_cogvidx5b_fluid_nexus_smoke_i2v_fix3samples-10-28-17-06/training_config.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ scale_factor: 0.7
3
+ disable_first_stage_autocast: true
4
+ not_trainable_prefixes:
5
+ - all
6
+ log_keys:
7
+ - txt
8
+ denoiser_config:
9
+ target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
10
+ params:
11
+ num_idx: 1000
12
+ quantize_c_noise: false
13
+ weighting_config:
14
+ target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
15
+ scaling_config:
16
+ target: sgm.modules.diffusionmodules.denoiser_scaling.VideoScaling
17
+ discretization_config:
18
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
19
+ params:
20
+ shift_scale: 1.0
21
+ network_config:
22
+ target: dit_video_concat.DiffusionTransformer
23
+ params:
24
+ time_embed_dim: 512
25
+ elementwise_affine: true
26
+ num_frames: 49
27
+ time_compressed_rate: 4
28
+ latent_width: 90
29
+ latent_height: 60
30
+ num_layers: 42
31
+ patch_size: 2
32
+ in_channels: 16
33
+ out_channels: 16
34
+ hidden_size: 3072
35
+ adm_in_channels: 256
36
+ num_attention_heads: 48
37
+ transformer_args:
38
+ checkpoint_activations: true
39
+ vocab_size: 1
40
+ max_sequence_length: 64
41
+ layernorm_order: pre
42
+ skip_init: false
43
+ model_parallel_size: 1
44
+ is_decoder: false
45
+ modules:
46
+ pos_embed_config:
47
+ target: dit_video_concat.Rotary3DPositionEmbeddingMixin
48
+ params:
49
+ hidden_size_head: 64
50
+ text_length: 226
51
+ lora_config:
52
+ target: sat.model.finetune.lora2.LoraMixin
53
+ params:
54
+ r: 128
55
+ patch_embed_config:
56
+ target: dit_video_concat.ImagePatchEmbeddingMixin
57
+ params:
58
+ text_hidden_size: 4096
59
+ adaln_layer_config:
60
+ target: dit_video_concat.AdaLNMixin
61
+ params:
62
+ qk_ln: true
63
+ final_layer_config:
64
+ target: dit_video_concat.FinalLayerMixin
65
+ conditioner_config:
66
+ target: sgm.modules.GeneralConditioner
67
+ params:
68
+ emb_models:
69
+ - is_trainable: false
70
+ input_key: txt
71
+ ucg_rate: 0.1
72
+ target: sgm.modules.encoders.modules.FrozenT5Embedder
73
+ params:
74
+ model_dir: /path/to/FluidNexusRoot/cogvideox-sat/2b/t5-v1_1-xxl
75
+ max_length: 226
76
+ first_stage_config:
77
+ target: vae_modules.autoencoder.VideoAutoencoderInferenceWrapper
78
+ params:
79
+ cp_size: 1
80
+ ckpt_path: /path/to/FluidNexusRoot/cogvideox-sat/5b/vae/3d-vae.pt
81
+ ignore_keys:
82
+ - loss
83
+ loss_config:
84
+ target: torch.nn.Identity
85
+ regularizer_config:
86
+ target: vae_modules.regularizers.DiagonalGaussianRegularizer
87
+ encoder_config:
88
+ target: vae_modules.cp_enc_dec.ContextParallelEncoder3D
89
+ params:
90
+ double_z: true
91
+ z_channels: 16
92
+ resolution: 256
93
+ in_channels: 3
94
+ out_ch: 3
95
+ ch: 128
96
+ ch_mult:
97
+ - 1
98
+ - 2
99
+ - 2
100
+ - 4
101
+ attn_resolutions: []
102
+ num_res_blocks: 3
103
+ dropout: 0.0
104
+ gather_norm: true
105
+ decoder_config:
106
+ target: vae_modules.cp_enc_dec.ContextParallelDecoder3D
107
+ params:
108
+ double_z: true
109
+ z_channels: 16
110
+ resolution: 256
111
+ in_channels: 3
112
+ out_ch: 3
113
+ ch: 128
114
+ ch_mult:
115
+ - 1
116
+ - 2
117
+ - 2
118
+ - 4
119
+ attn_resolutions: []
120
+ num_res_blocks: 3
121
+ dropout: 0.0
122
+ gather_norm: false
123
+ loss_fn_config:
124
+ target: sgm.modules.diffusionmodules.loss.VideoDiffusionLoss
125
+ params:
126
+ fixed_frames: 3
127
+ offset_noise_level: 0
128
+ sigma_sampler_config:
129
+ target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
130
+ params:
131
+ uniform_sampling: true
132
+ num_idx: 1000
133
+ discretization_config:
134
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
135
+ params:
136
+ shift_scale: 1.0
137
+ sampler_config:
138
+ target: sgm.modules.diffusionmodules.sampling.VPSDEDPMPP2MSampler
139
+ params:
140
+ num_steps: 50
141
+ verbose: true
142
+ discretization_config:
143
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
144
+ params:
145
+ shift_scale: 1.0
146
+ guider_config:
147
+ target: sgm.modules.diffusionmodules.guiders.DynamicCFG
148
+ params:
149
+ scale: 6
150
+ exp: 5
151
+ num_steps: 50
152
+ args:
153
+ checkpoint_activations: true
154
+ model_parallel_size: 1
155
+ experiment_name: lora_cogvidx5b_realcapture_blackbluecloud_i2v_fix3samples
156
+ mode: finetune
157
+ load: /path/to/FluidNexusRoot/cogvideox-sat/5b/transformer
158
+ no_load_rng: true
159
+ train_iters: 10000
160
+ eval_iters: 1
161
+ eval_interval: 1000
162
+ eval_batch_size: 1
163
+ save: /path/to/FluidNexusRoot/cogvideox_lora_ckpts
164
+ summary_dir: /path/to/FluidNexusRoot/cogvideox_lora_runs
165
+ save_interval: 500
166
+ log_interval: 100
167
+ train_data:
168
+ - /dev/shm/RealCaptureBlackBlueCloudSetData_cogvideox_dataset
169
+ valid_data:
170
+ - /dev/shm/RealCaptureBlackBlueCloudSetData_cogvideox_dataset_sub_235
171
+ split: 1,0,0
172
+ num_workers: 8
173
+ force_train: true
174
+ only_log_video_latents: false
175
+ data:
176
+ target: data_video.SFTDataset
177
+ params:
178
+ video_size:
179
+ - 480
180
+ - 720
181
+ fps: 8
182
+ max_num_frames: 49
183
+ skip_frms_num: 0
184
+ deepspeed:
185
+ train_micro_batch_size_per_gpu: 2
186
+ gradient_accumulation_steps: 1
187
+ steps_per_print: 50
188
+ gradient_clipping: 0.1
189
+ zero_optimization:
190
+ stage: 2
191
+ cpu_offload: false
192
+ contiguous_gradients: false
193
+ overlap_comm: true
194
+ reduce_scatter: true
195
+ reduce_bucket_size: 1000000000
196
+ allgather_bucket_size: 1000000000
197
+ load_from_fp32_weights: false
198
+ zero_allow_untested_optimizer: true
199
+ bf16:
200
+ enabled: true
201
+ fp16:
202
+ enabled: false
203
+ loss_scale: 0
204
+ loss_scale_window: 400
205
+ hysteresis: 2
206
+ min_loss_scale: 1
207
+ optimizer:
208
+ type: sat.ops.FusedEmaAdam
209
+ params:
210
+ lr: 0.001
211
+ betas:
212
+ - 0.9
213
+ - 0.95
214
+ eps: 1.0e-08
215
+ weight_decay: 0.0001
216
+ activation_checkpointing:
217
+ partition_activations: false
218
+ contiguous_memory_optimization: false
219
+ wall_clock_breakdown: false
cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/10000-ema/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca32a23c842129b42bbbd15021e6487c342fa22dddd7d9af9a58bceb3e1df378
3
+ size 42463648157
cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/10000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a484a53f48e0ea059a7ab4e5df3ca5330fcf6f1adcdf3111593e3d55aea0416c
3
+ size 42463648157
cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ 10000
cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/model_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_class": "SATVideoDiffusionEngine",
3
+ "model_parallel_size": 1
4
+ }
cogvideox_lora_ckpts/lora_cogvidx5b_scalar_flow_i2v_fix3samples-09-09-05-39/training_config.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ scale_factor: 0.7
3
+ disable_first_stage_autocast: true
4
+ not_trainable_prefixes:
5
+ - all
6
+ log_keys:
7
+ - txt
8
+ denoiser_config:
9
+ target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
10
+ params:
11
+ num_idx: 1000
12
+ quantize_c_noise: false
13
+ weighting_config:
14
+ target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
15
+ scaling_config:
16
+ target: sgm.modules.diffusionmodules.denoiser_scaling.VideoScaling
17
+ discretization_config:
18
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
19
+ params:
20
+ shift_scale: 1.0
21
+ network_config:
22
+ target: dit_video_concat.DiffusionTransformer
23
+ params:
24
+ time_embed_dim: 512
25
+ elementwise_affine: true
26
+ num_frames: 49
27
+ time_compressed_rate: 4
28
+ latent_width: 90
29
+ latent_height: 60
30
+ num_layers: 42
31
+ patch_size: 2
32
+ in_channels: 16
33
+ out_channels: 16
34
+ hidden_size: 3072
35
+ adm_in_channels: 256
36
+ num_attention_heads: 48
37
+ transformer_args:
38
+ checkpoint_activations: true
39
+ vocab_size: 1
40
+ max_sequence_length: 64
41
+ layernorm_order: pre
42
+ skip_init: false
43
+ model_parallel_size: 1
44
+ is_decoder: false
45
+ modules:
46
+ pos_embed_config:
47
+ target: dit_video_concat.Rotary3DPositionEmbeddingMixin
48
+ params:
49
+ hidden_size_head: 64
50
+ text_length: 226
51
+ lora_config:
52
+ target: sat.model.finetune.lora2.LoraMixin
53
+ params:
54
+ r: 128
55
+ patch_embed_config:
56
+ target: dit_video_concat.ImagePatchEmbeddingMixin
57
+ params:
58
+ text_hidden_size: 4096
59
+ adaln_layer_config:
60
+ target: dit_video_concat.AdaLNMixin
61
+ params:
62
+ qk_ln: true
63
+ final_layer_config:
64
+ target: dit_video_concat.FinalLayerMixin
65
+ conditioner_config:
66
+ target: sgm.modules.GeneralConditioner
67
+ params:
68
+ emb_models:
69
+ - is_trainable: false
70
+ input_key: txt
71
+ ucg_rate: 0.1
72
+ target: sgm.modules.encoders.modules.FrozenT5Embedder
73
+ params:
74
+ model_dir: /path/to/FluidNexusRoot/cogvideox-sat/2b/t5-v1_1-xxl
75
+ max_length: 226
76
+ first_stage_config:
77
+ target: vae_modules.autoencoder.VideoAutoencoderInferenceWrapper
78
+ params:
79
+ cp_size: 1
80
+ ckpt_path: /path/to/FluidNexusRoot/cogvideox-sat/5b/vae/3d-vae.pt
81
+ ignore_keys:
82
+ - loss
83
+ loss_config:
84
+ target: torch.nn.Identity
85
+ regularizer_config:
86
+ target: vae_modules.regularizers.DiagonalGaussianRegularizer
87
+ encoder_config:
88
+ target: vae_modules.cp_enc_dec.ContextParallelEncoder3D
89
+ params:
90
+ double_z: true
91
+ z_channels: 16
92
+ resolution: 256
93
+ in_channels: 3
94
+ out_ch: 3
95
+ ch: 128
96
+ ch_mult:
97
+ - 1
98
+ - 2
99
+ - 2
100
+ - 4
101
+ attn_resolutions: []
102
+ num_res_blocks: 3
103
+ dropout: 0.0
104
+ gather_norm: true
105
+ decoder_config:
106
+ target: vae_modules.cp_enc_dec.ContextParallelDecoder3D
107
+ params:
108
+ double_z: true
109
+ z_channels: 16
110
+ resolution: 256
111
+ in_channels: 3
112
+ out_ch: 3
113
+ ch: 128
114
+ ch_mult:
115
+ - 1
116
+ - 2
117
+ - 2
118
+ - 4
119
+ attn_resolutions: []
120
+ num_res_blocks: 3
121
+ dropout: 0.0
122
+ gather_norm: false
123
+ loss_fn_config:
124
+ target: sgm.modules.diffusionmodules.loss.VideoDiffusionLoss
125
+ params:
126
+ fixed_frames: 3
127
+ offset_noise_level: 0
128
+ sigma_sampler_config:
129
+ target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
130
+ params:
131
+ uniform_sampling: true
132
+ num_idx: 1000
133
+ discretization_config:
134
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
135
+ params:
136
+ shift_scale: 1.0
137
+ sampler_config:
138
+ target: sgm.modules.diffusionmodules.sampling.VPSDEDPMPP2MSampler
139
+ params:
140
+ num_steps: 50
141
+ verbose: true
142
+ discretization_config:
143
+ target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization
144
+ params:
145
+ shift_scale: 1.0
146
+ guider_config:
147
+ target: sgm.modules.diffusionmodules.guiders.DynamicCFG
148
+ params:
149
+ scale: 6
150
+ exp: 5
151
+ num_steps: 50
152
+ args:
153
+ checkpoint_activations: true
154
+ model_parallel_size: 1
155
+ experiment_name: lora_cogvidx5b_scalarflow_all_i2v_fix3samples
156
+ mode: finetune
157
+ load: /path/to/FluidNexusRoot/cogvideox-sat/5b/transformer
158
+ no_load_rng: true
159
+ train_iters: 10000
160
+ eval_iters: 1
161
+ eval_interval: 1000
162
+ eval_batch_size: 1
163
+ save: /path/to/FluidNexusRoot/cogvideox_lora_ckpts
164
+ summary_dir: /path/to/FluidNexusRoot/cogvideox_lora_runs
165
+ save_interval: 500
166
+ log_interval: 100
167
+ train_data:
168
+ - /dev/shm/ScalarFlow_cogvideox_dataset
169
+ valid_data:
170
+ - /dev/shm/ScalarFlow_cogvideox_dataset_sub_90
171
+ split: 1,0,0
172
+ num_workers: 8
173
+ force_train: true
174
+ only_log_video_latents: false
175
+ data:
176
+ target: data_video.SFTDataset
177
+ params:
178
+ video_size:
179
+ - 480
180
+ - 720
181
+ fps: 8
182
+ max_num_frames: 49
183
+ skip_frms_num: 0
184
+ deepspeed:
185
+ train_micro_batch_size_per_gpu: 2
186
+ gradient_accumulation_steps: 1
187
+ steps_per_print: 50
188
+ gradient_clipping: 0.1
189
+ zero_optimization:
190
+ stage: 2
191
+ cpu_offload: false
192
+ contiguous_gradients: false
193
+ overlap_comm: true
194
+ reduce_scatter: true
195
+ reduce_bucket_size: 1000000000
196
+ allgather_bucket_size: 1000000000
197
+ load_from_fp32_weights: false
198
+ zero_allow_untested_optimizer: true
199
+ bf16:
200
+ enabled: true
201
+ fp16:
202
+ enabled: false
203
+ loss_scale: 0
204
+ loss_scale_window: 400
205
+ hysteresis: 2
206
+ min_loss_scale: 1
207
+ optimizer:
208
+ type: sat.ops.FusedEmaAdam
209
+ params:
210
+ lr: 0.001
211
+ betas:
212
+ - 0.9
213
+ - 0.95
214
+ eps: 1.0e-08
215
+ weight_decay: 0.0001
216
+ activation_checkpointing:
217
+ partition_activations: false
218
+ contiguous_memory_optimization: false
219
+ wall_clock_breakdown: false
zero123_finetune_logs/2024-04-24T14-04-16_scalar_flow/checkpoints/step=000015499.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79155ecf6818fa9c813d3eaf12d91a13a5d57b6c8845ca1ec3b1e0a72c828a49
3
+ size 15465966043
zero123_finetune_logs/2024-10-28T18-26-21_fluid_nexus_ball/checkpoints/step=000079999.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b7685a157f2b602312cf438106f8d1269bb3e8cfcfe8b5b11c45230c14818d
3
+ size 15465966616
zero123_finetune_logs/2024-10-30T06-31-08_fluid_nexus_smoke/checkpoints/step=000051999.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774267ba698138e2208fcb6b047797e51d6159a003131c9e07c67ec9de0710d3
3
+ size 15465974168