Aduc-sdr commited on
Commit
75a908a
·
verified ·
1 Parent(s): 85b856b

Upload main.yaml

Browse files
Files changed (1) hide show
  1. main.yaml +88 -0
main.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __object__:
2
+ path: projects.video_diffusion_sr.train
3
+ name: VideoDiffusionTrainer
4
+
5
+ dit:
6
+ model:
7
+ __object__:
8
+ path: models.dit_v2.nadit
9
+ name: NaDiT
10
+ args: as_params
11
+ vid_in_channels: 33
12
+ vid_out_channels: 16
13
+ vid_dim: 2560
14
+ vid_out_norm: fusedrms
15
+ txt_in_dim: 5120
16
+ txt_in_norm: fusedln
17
+ txt_dim: ${.vid_dim}
18
+ emb_dim: ${eval:'6 * ${.vid_dim}'}
19
+ heads: 20
20
+ head_dim: 128 # llm-like
21
+ expand_ratio: 4
22
+ norm: fusedrms
23
+ norm_eps: 1.0e-05
24
+ ada: single
25
+ qk_bias: False
26
+ qk_norm: fusedrms
27
+ patch_size: [ 1,2,2 ]
28
+ num_layers: 32 # llm-like
29
+ mm_layers: 10
30
+ mlp_type: swiglu
31
+ msa_type: None
32
+ block_type: ${eval:'${.num_layers} * ["mmdit_sr"]'} # space-full
33
+ window: ${eval:'${.num_layers} * [(4,3,3)]'} # space-full
34
+ window_method: ${eval:'${.num_layers} // 2 * ["720pwin_by_size_bysize","720pswin_by_size_bysize"]'} # space-full
35
+ rope_type: mmrope3d
36
+ rope_dim: 128
37
+ compile: False
38
+ gradient_checkpoint: True
39
+ fsdp:
40
+ sharding_strategy: _HYBRID_SHARD_ZERO2
41
+
42
+ ema:
43
+ decay: 0.9998
44
+
45
+ vae:
46
+ model:
47
+ __inherit__: models/video_vae_v3/s8_c16_t4_inflation_sd3.yaml
48
+ freeze_encoder: False
49
+ # gradient_checkpoint: True
50
+ slicing:
51
+ split_size: 4
52
+ memory_device: same
53
+ memory_limit:
54
+ conv_max_mem: 0.5
55
+ norm_max_mem: 0.5
56
+ checkpoint: ./ckpts/ema_vae.pth
57
+ scaling_factor: 0.9152
58
+ compile: False
59
+ grouping: False
60
+ dtype: bfloat16
61
+
62
+ diffusion:
63
+ schedule:
64
+ type: lerp
65
+ T: 1000.0
66
+ sampler:
67
+ type: euler
68
+ prediction_type: v_lerp
69
+ timesteps:
70
+ training:
71
+ type: logitnormal
72
+ loc: 0.0
73
+ scale: 1.0
74
+ sampling:
75
+ type: uniform_trailing
76
+ steps: 50
77
+ transform: True
78
+ loss:
79
+ type: v_lerp
80
+ cfg:
81
+ scale: 7.5
82
+ rescale: 0
83
+
84
+ condition:
85
+ i2v: 0.0
86
+ v2v: 0.0
87
+ sr: 1.0
88
+ noise_scale: 0.25