HappyP4nda commited on
Commit
55f3ab3
·
verified ·
1 Parent(s): bd546bf

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Semo/config/a2m/a2m_t1d128.yaml +18 -0
  2. Semo/config/a2m/a2m_t1d128_posepre.yaml +29 -0
  3. Semo/config/a2m/a2m_t1d512.yaml +18 -0
  4. Semo/config/a2m/a2m_t1d512_doubleref.yaml +18 -0
  5. Semo/config/a2m/a2m_t1d512_pose.yaml +24 -0
  6. Semo/config/a2m/a2m_t1d512_posepre.yaml +29 -0
  7. Semo/config/a2m/a2m_t1d64.yaml +18 -0
  8. Semo/config/a2m/a2m_t1d64_posepre.yaml +29 -0
  9. Semo/config/a2m/a2m_t2d256.yaml +18 -0
  10. Semo/config/a2m/a2m_t2d256_pose.yaml +24 -0
  11. Semo/config/a2m/a2m_t2d256_posepre.yaml +29 -0
  12. Semo/config/a2m/cross_audio_pose_t1d512_l16_dim1024.yaml +25 -0
  13. Semo/config/a2m/cross_audio_pose_t1d512_l64_dim1024.yaml +25 -0
  14. Semo/config/a2m/cross_audio_pose_t2d256_l16_dim1024.yaml +25 -0
  15. Semo/config/a2m/cross_audio_pose_t4d128_l16_dim1024.yaml +25 -0
  16. Semo/config/a2m/cross_audio_pose_t4d128_l32_dim1024.yaml +25 -0
  17. Semo/config/a2m/cross_audio_posepre_t1d512_l16_dim1024.yaml +30 -0
  18. Semo/config/a2m/cross_audio_posepre_t1d512_l32_dim1024.yaml +30 -0
  19. Semo/config/a2m/cross_audio_posepre_t1d512_l64_dim1024.yaml +30 -0
  20. Semo/config/a2m/cross_audio_posepre_t2d256_l16_dim1024.yaml +30 -0
  21. Semo/config/a2m/cross_audio_posepre_t4d128_l16_dim1024.yaml +30 -0
  22. Semo/config/a2m/cross_audio_t1d512_l16_dim1024.yaml +19 -0
  23. Semo/config/a2m/cross_audio_t2d256_l16_dim1024.yaml +19 -0
  24. Semo/config/a2m/cross_audio_t4d128_l16_dim1024.yaml +19 -0
  25. Semo/config/a2m/cross_audio_t4d128_l32_dim1024.yaml +19 -0
  26. Semo/config/accelerate_config_1.yaml +9 -0
  27. Semo/config/accelerate_config_2.yaml +9 -0
  28. Semo/config/accelerate_config_3.yaml +9 -0
  29. Semo/config/accelerate_config_4.yaml +9 -0
  30. Semo/config/accelerate_config_5.yaml +9 -0
  31. Semo/config/accelerate_config_6.yaml +9 -0
  32. Semo/config/accelerate_config_7.yaml +9 -0
  33. Semo/config/accelerate_config_8.yaml +9 -0
  34. Semo/config/inference/a2m.yaml +12 -0
  35. Semo/config/inference/a2m_wpose.yaml +12 -0
  36. Semo/config/inference/amd-s-t1-d1024-spatial-ablation.yaml +6 -0
  37. Semo/config/inference/amd-s-t1-d128-spatial-ablation.yaml +6 -0
  38. Semo/config/inference/amd-s-t1-d256-spatial-ablation.yaml +6 -0
  39. Semo/config/inference/amd-s-t1-d32-spatial-ablation.yaml +6 -0
  40. Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask25.yaml +6 -0
  41. Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask50.yaml +6 -0
  42. Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask75.yaml +6 -0
  43. Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask90.yaml +6 -0
  44. Semo/config/inference/amd-s-t1-d512-spatial-ablation.yaml +6 -0
  45. Semo/config/inference/amd-s-t1-d64-spatial-ablation.yaml +6 -0
  46. Semo/config/inference/amd-s-t1-d768-spatial-ablation.yaml +6 -0
  47. Semo/config/inference/amd-s-t8-d64-spatial-ablation.yaml +6 -0
  48. Semo/config/inference/p2m.yaml +8 -0
  49. Semo/config/inference/rec.yaml +5 -0
  50. Semo/config/inference/rec_facevid.yaml +5 -0
Semo/config/a2m/a2m_t1d128.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 128
8
+ num_step : 1000
9
+
10
+ # ----------- Audio feature encoder -----------
11
+ intermediate_dim : 1024
12
+ window_size : 32
13
+ encoder_out_dim : 1024
14
+
15
+ # ----------- Diffusion Transformer -----------
16
+ diffusion_attn_head_dim : 64
17
+ diffusion_attn_num_heads : 16
18
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d128_posepre.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 128
8
+ num_step : 1000
9
+
10
+ # ----------- pose --------
11
+ pose_height : 32
12
+ pose_width : 32
13
+ pose_inchannel : 4
14
+ pose_patch_size : 2
15
+
16
+ # ----------- pose predictor --------
17
+ pose_predictor_attn_head_dim : 64
18
+ pose_predictor_attn_num_heads : 8
19
+ pose_predictor_attn_num_layers : 4
20
+
21
+ # ----------- Audio feature encoder -----------
22
+ intermediate_dim : 1024
23
+ window_size : 32
24
+ encoder_out_dim : 1024
25
+
26
+ # ----------- Diffusion Transformer -----------
27
+ diffusion_attn_head_dim : 64
28
+ diffusion_attn_num_heads : 16
29
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d512.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ num_step : 1000
9
+
10
+ # ----------- Audio feature encoder -----------
11
+ intermediate_dim : 1024
12
+ window_size : 32
13
+ encoder_out_dim : 1024
14
+
15
+ # ----------- Diffusion Transformer -----------
16
+ diffusion_attn_head_dim : 64
17
+ diffusion_attn_num_heads : 16
18
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d512_doubleref.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_DoubleRef # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ num_step : 1000
9
+
10
+ # ----------- Audio feature encoder -----------
11
+ intermediate_dim : 1024
12
+ window_size : 32
13
+ encoder_out_dim : 1024
14
+
15
+ # ----------- Diffusion Transformer -----------
16
+ diffusion_attn_head_dim : 64
17
+ diffusion_attn_num_heads : 16
18
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d512_pose.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ num_step : 1000
9
+
10
+ # ----------- pose --------
11
+ pose_height : 32
12
+ pose_width : 32
13
+ pose_inchannel : 4
14
+ pose_patch_size : 2
15
+
16
+ # ----------- Audio feature encoder -----------
17
+ intermediate_dim : 1024
18
+ window_size : 32
19
+ encoder_out_dim : 1024
20
+
21
+ # ----------- Diffusion Transformer -----------
22
+ diffusion_attn_head_dim : 64
23
+ diffusion_attn_num_heads : 16
24
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d512_posepre.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ num_step : 1000
9
+
10
+ # ----------- pose --------
11
+ pose_height : 32
12
+ pose_width : 32
13
+ pose_inchannel : 4
14
+ pose_patch_size : 2
15
+
16
+ # ----------- pose predictor --------
17
+ pose_predictor_attn_head_dim : 64
18
+ pose_predictor_attn_num_heads : 8
19
+ pose_predictor_attn_num_layers : 4
20
+
21
+ # ----------- Audio feature encoder -----------
22
+ intermediate_dim : 1024
23
+ window_size : 32
24
+ encoder_out_dim : 1024
25
+
26
+ # ----------- Diffusion Transformer -----------
27
+ diffusion_attn_head_dim : 64
28
+ diffusion_attn_num_heads : 16
29
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d64.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 64
8
+ num_step : 1000
9
+
10
+ # ----------- Audio feature encoder -----------
11
+ intermediate_dim : 1024
12
+ window_size : 32
13
+ encoder_out_dim : 1024
14
+
15
+ # ----------- Diffusion Transformer -----------
16
+ diffusion_attn_head_dim : 64
17
+ diffusion_attn_num_heads : 16
18
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t1d64_posepre.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 64
8
+ num_step : 1000
9
+
10
+ # ----------- pose --------
11
+ pose_height : 32
12
+ pose_width : 32
13
+ pose_inchannel : 4
14
+ pose_patch_size : 2
15
+
16
+ # ----------- pose predictor --------
17
+ pose_predictor_attn_head_dim : 64
18
+ pose_predictor_attn_num_heads : 8
19
+ pose_predictor_attn_num_layers : 4
20
+
21
+ # ----------- Audio feature encoder -----------
22
+ intermediate_dim : 1024
23
+ window_size : 32
24
+ encoder_out_dim : 1024
25
+
26
+ # ----------- Diffusion Transformer -----------
27
+ diffusion_attn_head_dim : 64
28
+ diffusion_attn_num_heads : 16
29
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t2d256.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 2
7
+ motion_in_channel : 256
8
+ num_step : 1000
9
+
10
+ # ----------- Audio feature encoder -----------
11
+ intermediate_dim : 1024
12
+ window_size : 32
13
+ encoder_out_dim : 1024
14
+
15
+ # ----------- Diffusion Transformer -----------
16
+ diffusion_attn_head_dim : 64
17
+ diffusion_attn_num_heads : 16
18
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t2d256_pose.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 2
7
+ motion_in_channel : 256
8
+ num_step : 1000
9
+
10
+ # ----------- pose --------
11
+ pose_height : 32
12
+ pose_width : 32
13
+ pose_inchannel : 4
14
+ pose_patch_size : 2
15
+
16
+ # ----------- Audio feature encoder -----------
17
+ intermediate_dim : 1024
18
+ window_size : 32
19
+ encoder_out_dim : 1024
20
+
21
+ # ----------- Diffusion Transformer -----------
22
+ diffusion_attn_head_dim : 64
23
+ diffusion_attn_num_heads : 16
24
+ diffusion_num_layers : 8
Semo/config/a2m/a2m_t2d256_posepre.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 2
7
+ motion_in_channel : 256
8
+ num_step : 1000
9
+
10
+ # ----------- pose --------
11
+ pose_height : 32
12
+ pose_width : 32
13
+ pose_inchannel : 4
14
+ pose_patch_size : 2
15
+
16
+ # ----------- pose predictor --------
17
+ pose_predictor_attn_head_dim : 64
18
+ pose_predictor_attn_num_heads : 8
19
+ pose_predictor_attn_num_layers : 4
20
+
21
+ # ----------- Audio feature encoder -----------
22
+ intermediate_dim : 1024
23
+ window_size : 32
24
+ encoder_out_dim : 1024
25
+
26
+ # ----------- Diffusion Transformer -----------
27
+ diffusion_attn_head_dim : 64
28
+ diffusion_attn_num_heads : 16
29
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_pose_t1d512_l16_dim1024.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- Audio feature encoder -----------
18
+ intermediate_dim : 1024
19
+ window_size : 32
20
+ encoder_out_dim : 1024
21
+
22
+ # ----------- Diffusion Transformer -----------
23
+ diffusion_attn_head_dim : 64
24
+ diffusion_attn_num_heads : 16
25
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_pose_t1d512_l64_dim1024.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ motion_frames : 64
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- Audio feature encoder -----------
18
+ intermediate_dim : 1024
19
+ window_size : 32
20
+ encoder_out_dim : 1024
21
+
22
+ # ----------- Diffusion Transformer -----------
23
+ diffusion_attn_head_dim : 64
24
+ diffusion_attn_num_heads : 16
25
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_pose_t2d256_l16_dim1024.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 2
7
+ motion_in_channel : 256
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- Audio feature encoder -----------
18
+ intermediate_dim : 1024
19
+ window_size : 32
20
+ encoder_out_dim : 1024
21
+
22
+ # ----------- Diffusion Transformer -----------
23
+ diffusion_attn_head_dim : 64
24
+ diffusion_attn_num_heads : 16
25
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_pose_t4d128_l16_dim1024.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 4
7
+ motion_in_channel : 128
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- Audio feature encoder -----------
18
+ intermediate_dim : 1024
19
+ window_size : 32
20
+ encoder_out_dim : 1024
21
+
22
+ # ----------- Diffusion Transformer -----------
23
+ diffusion_attn_head_dim : 64
24
+ diffusion_attn_num_heads : 16
25
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_pose_t4d128_l32_dim1024.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_Pose # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 4
7
+ motion_in_channel : 128
8
+ motion_frames : 32
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- Audio feature encoder -----------
18
+ intermediate_dim : 1024
19
+ window_size : 32
20
+ encoder_out_dim : 1024
21
+
22
+ # ----------- Diffusion Transformer -----------
23
+ diffusion_attn_head_dim : 64
24
+ diffusion_attn_num_heads : 16
25
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_posepre_t1d512_l16_dim1024.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- pose predictor --------
18
+ pose_predictor_attn_head_dim : 64
19
+ pose_predictor_attn_num_heads : 8
20
+ pose_predictor_attn_num_layers : 4
21
+
22
+ # ----------- Audio feature encoder -----------
23
+ intermediate_dim : 1024
24
+ window_size : 32
25
+ encoder_out_dim : 1024
26
+
27
+ # ----------- Diffusion Transformer -----------
28
+ diffusion_attn_head_dim : 64
29
+ diffusion_attn_num_heads : 16
30
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_posepre_t1d512_l32_dim1024.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ motion_frames : 32
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- pose predictor --------
18
+ pose_predictor_attn_head_dim : 64
19
+ pose_predictor_attn_num_heads : 8
20
+ pose_predictor_attn_num_layers : 4
21
+
22
+ # ----------- Audio feature encoder -----------
23
+ intermediate_dim : 1024
24
+ window_size : 32
25
+ encoder_out_dim : 1024
26
+
27
+ # ----------- Diffusion Transformer -----------
28
+ diffusion_attn_head_dim : 64
29
+ diffusion_attn_num_heads : 16
30
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_posepre_t1d512_l64_dim1024.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ motion_frames : 64
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- pose predictor --------
18
+ pose_predictor_attn_head_dim : 64
19
+ pose_predictor_attn_num_heads : 8
20
+ pose_predictor_attn_num_layers : 4
21
+
22
+ # ----------- Audio feature encoder -----------
23
+ intermediate_dim : 1024
24
+ window_size : 32
25
+ encoder_out_dim : 1024
26
+
27
+ # ----------- Diffusion Transformer -----------
28
+ diffusion_attn_head_dim : 64
29
+ diffusion_attn_num_heads : 16
30
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_posepre_t2d256_l16_dim1024.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 2
7
+ motion_in_channel : 256
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- pose predictor --------
18
+ pose_predictor_attn_head_dim : 64
19
+ pose_predictor_attn_num_heads : 8
20
+ pose_predictor_attn_num_layers : 4
21
+
22
+ # ----------- Audio feature encoder -----------
23
+ intermediate_dim : 1024
24
+ window_size : 32
25
+ encoder_out_dim : 1024
26
+
27
+ # ----------- Diffusion Transformer -----------
28
+ diffusion_attn_head_dim : 64
29
+ diffusion_attn_num_heads : 16
30
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_posepre_t4d128_l16_dim1024.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio_PosePre # 532M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 4
7
+ motion_in_channel : 128
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- pose --------
12
+ pose_height : 32
13
+ pose_width : 32
14
+ pose_inchannel : 4
15
+ pose_patch_size : 2
16
+
17
+ # ----------- pose predictor --------
18
+ pose_predictor_attn_head_dim : 64
19
+ pose_predictor_attn_num_heads : 8
20
+ pose_predictor_attn_num_layers : 4
21
+
22
+ # ----------- Audio feature encoder -----------
23
+ intermediate_dim : 1024
24
+ window_size : 32
25
+ encoder_out_dim : 1024
26
+
27
+ # ----------- Diffusion Transformer -----------
28
+ diffusion_attn_head_dim : 64
29
+ diffusion_attn_num_heads : 16
30
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_t1d512_l16_dim1024.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 1
7
+ motion_in_channel : 512
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- Audio feature encoder -----------
12
+ intermediate_dim : 1024
13
+ window_size : 32
14
+ encoder_out_dim : 1024
15
+
16
+ # ----------- Diffusion Transformer -----------
17
+ diffusion_attn_head_dim : 64
18
+ diffusion_attn_num_heads : 16
19
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_t2d256_l16_dim1024.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 2
7
+ motion_in_channel : 256
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- Audio feature encoder -----------
12
+ intermediate_dim : 1024
13
+ window_size : 32
14
+ encoder_out_dim : 1024
15
+
16
+ # ----------- Diffusion Transformer -----------
17
+ diffusion_attn_head_dim : 64
18
+ diffusion_attn_num_heads : 16
19
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_t4d128_l16_dim1024.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 4
7
+ motion_in_channel : 128
8
+ motion_frames : 16
9
+ num_step : 1000
10
+
11
+ # ----------- Audio feature encoder -----------
12
+ intermediate_dim : 1024
13
+ window_size : 32
14
+ encoder_out_dim : 1024
15
+
16
+ # ----------- Diffusion Transformer -----------
17
+ diffusion_attn_head_dim : 64
18
+ diffusion_attn_num_heads : 16
19
+ diffusion_num_layers : 8
Semo/config/a2m/cross_audio_t4d128_l32_dim1024.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type : A2MModel_CrossAtten_Audio # 200M : 20M + 180M
2
+ model:
3
+ audio_inchannel : 384
4
+ audio_block : 50
5
+
6
+ motion_num_token : 4
7
+ motion_in_channel : 128
8
+ motion_frames : 32
9
+ num_step : 1000
10
+
11
+ # ----------- Audio feature encoder -----------
12
+ intermediate_dim : 1024
13
+ window_size : 32
14
+ encoder_out_dim : 1024
15
+
16
+ # ----------- Diffusion Transformer -----------
17
+ diffusion_attn_head_dim : 64
18
+ diffusion_attn_num_heads : 16
19
+ diffusion_num_layers : 8
Semo/config/accelerate_config_1.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: NO
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 1
8
+ gpu_ids: 0,
9
+ use_cpu: false
Semo/config/accelerate_config_2.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 2
8
+ gpu_ids: 0,1
9
+ use_cpu: false
Semo/config/accelerate_config_3.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 3
8
+ gpu_ids: 0,1,2
9
+ use_cpu: false
Semo/config/accelerate_config_4.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 4
8
+ gpu_ids: 0,1,2,3
9
+ use_cpu: false
Semo/config/accelerate_config_5.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 5
8
+ gpu_ids: 0,1,2,3,4
9
+ use_cpu: false
Semo/config/accelerate_config_6.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 6
8
+ gpu_ids: 0,1,2,3,4,5
9
+ use_cpu: false
Semo/config/accelerate_config_7.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 7
8
+ gpu_ids: 0,1,2,3,4,5,6
9
+ use_cpu: false
Semo/config/accelerate_config_8.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ fsdp_config: {}
4
+ machine_rank: 0
5
+ main_training_function: main
6
+ num_machines: 1
7
+ num_processes: 8
8
+ gpu_ids: 0,1,2,3,4,5,6,7
9
+ use_cpu: false
Semo/config/inference/a2m.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ whisper_model_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/whisper_tiny.pt
2
+ audio_separator_model_file: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/Kim_Vocal_2.onnx
3
+ cache_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/sample/vocals
4
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
5
+ a2m_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/config/a2m/a2m_t1d512.yaml
6
+ a2m_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/a2m.safetensors
7
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial/config.json
8
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial/checkpoints/checkpoint-131000/model.safetensors
9
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/amd_sample/sample
10
+ enable_pose: false
11
+ a2m_sample_steps: 4
12
+ amd_sample_steps: 4
Semo/config/inference/a2m_wpose.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ whisper_model_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/whisper_tiny.pt
2
+ audio_separator_model_file: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/Kim_Vocal_2.onnx
3
+ cache_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/sample/vocals
4
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
5
+ a2m_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/config/a2m/a2m_t1d512_posepre.yaml
6
+ a2m_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/a2m/a2m-t1d512-f16-posepre-spatial/checkpoints/checkpoint-103000/model.safetensors
7
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial/config.json
8
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial/checkpoints/checkpoint-131000/model.safetensors
9
+ a2m_sample_steps: 4
10
+ amd_sample_steps: 4
11
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/sample
12
+ enable_pose: true
Semo/config/inference/amd-s-t1-d1024-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d1024-spatial-ablation/checkpoints/checkpoint-104000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d1024-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d1024-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d128-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d128-spatial-ablation/checkpoints/checkpoint-485000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d128-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d128-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d256-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d256-spatial-ablation/checkpoints/checkpoint-104000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d256-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d256-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d32-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d32-spatial-ablation/checkpoints/checkpoint-365000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d32-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d32-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask25.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask25/checkpoints/checkpoint-88000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask25/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d512-nonorm-spatial-mask25/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask50.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask50/checkpoints/checkpoint-111000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask50/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d512-nonorm-spatial-mask50/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask75.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask75/checkpoints/checkpoint-189000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask75/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d512-nonorm-spatial-mask75/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d512-nonorm-spatial-mask90.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask90/checkpoints/checkpoint-110000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial-mask90/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d512-nonorm-spatial-mask90/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d512-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d512-spatial-ablation/checkpoints/checkpoint-140000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d512-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d512-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d64-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d64-spatial-ablation/checkpoints/checkpoint-378000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d64-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d64-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t1-d768-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d768-spatial-ablation/checkpoints/checkpoint-102000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t1-d768-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t1-d768-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/amd-s-t8-d64-spatial-ablation.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t8-d64-spatial-ablation/checkpoints/checkpoint-103000/model.safetensors
2
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd_ablation/amd-s-t8-d64-spatial-ablation/config.json
3
+ amd_sample_steps: 20
4
+ mask_ratio: 0.0
5
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/amd-s-t8-d64-spatial-ablation/result/mead
6
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
Semo/config/inference/p2m.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
2
+ p2m_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/config/p2m/p2m_t1d512.yaml
3
+ p2m_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/p2m.safetensors
4
+ amd_config_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial/config.json
5
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/newexp/amd/amd-s-t1-d512-nonorm-spatial/checkpoints/checkpoint-131000/model.safetensors
6
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/sample
7
+ p2m_sample_steps: 4
8
+ amd_sample_steps: 4
Semo/config/inference/rec.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
2
+ amd_config_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/exp/newexp/amd_ablation/amd-s-t1-d512-doubleref-ablation/config.json
3
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/rec.safetensors
4
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/sample/reconstruction
5
+ amd_sample_steps: 4
Semo/config/inference/rec_facevid.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ vae_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/model-checkpoints/sd-vae-ft-mse
2
+ amd_config_path: /mnt/pfs-mc0p4k/tts/team/digital_avatar_group/sunwenzhang/qiyuan/exp/newexp/amd_ablation/amd-s-t1-d512-doubleref-ablation/config.json
3
+ amd_ckpt_path: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/pretrained_weights/rec.safetensors
4
+ output_dir: /mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/sample/reconstruction/facevid_step20
5
+ amd_sample_steps: 20