mnhatdaous commited on
Commit
3b4f4ee
·
1 Parent(s): 0216954

update yamls

Browse files
dac-vae/base.yml CHANGED
@@ -19,12 +19,11 @@ discriminator:
19
  periods: [2, 3, 5, 7, 11]
20
  fft_sizes: [2048, 1024, 512]
21
  bands:
22
- - [0.0, 0.1]
23
- - [0.1, 0.25]
24
- - [0.25, 0.5]
25
- - [0.5, 0.75]
26
- - [0.75, 1.0]
27
-
28
 
29
  max_norm: 1000
30
  max_norm_d: 10
 
19
  periods: [2, 3, 5, 7, 11]
20
  fft_sizes: [2048, 1024, 512]
21
  bands:
22
+ - [0.0, 0.1]
23
+ - [0.1, 0.25]
24
+ - [0.25, 0.5]
25
+ - [0.5, 0.75]
26
+ - [0.75, 1.0]
 
27
 
28
  max_norm: 1000
29
  max_norm_d: 10
dac-vae/config.yml CHANGED
@@ -19,12 +19,11 @@ discriminator:
19
  periods: [2, 3, 5, 7, 11]
20
  fft_sizes: [2048, 1024, 512]
21
  bands:
22
- - [0.0, 0.1]
23
- - [0.1, 0.25]
24
- - [0.25, 0.5]
25
- - [0.5, 0.75]
26
- - [0.75, 1.0]
27
-
28
 
29
  max_norm: 1000
30
  max_norm_d: 10
 
19
  periods: [2, 3, 5, 7, 11]
20
  fft_sizes: [2048, 1024, 512]
21
  bands:
22
+ - [0.0, 0.1]
23
+ - [0.1, 0.25]
24
+ - [0.25, 0.5]
25
+ - [0.5, 0.75]
26
+ - [0.75, 1.0]
 
27
 
28
  max_norm: 1000
29
  max_norm_d: 10
flowae/configs/datasets/dae.yaml CHANGED
@@ -6,7 +6,7 @@ datasets:
6
  dataset:
7
  name: class_folder_audio
8
  args:
9
- root_path: "/home/masuser/minimax-audio/dataset/Emilia/EN"
10
  sample_rate: 24000
11
  duration: 0.38
12
  shuffle: true
@@ -19,14 +19,14 @@ datasets:
19
  batch_size: 52
20
  num_workers: 8
21
  drop_last: true
22
-
23
  val:
24
  name: wrapper_audio_cae
25
  args:
26
  dataset:
27
  name: class_folder_audio
28
  args:
29
- root_path: "/home/masuser/minimax-audio/dataset/libritts"
30
  sample_rate: 24000
31
  duration: 5.0
32
  shuffle: false
@@ -39,14 +39,14 @@ datasets:
39
  batch_size: 4
40
  num_workers: 8
41
  drop_last: false
42
-
43
  eval_ae:
44
  name: wrapper_audio_cae
45
  args:
46
  dataset:
47
  name: class_folder_audio
48
  args:
49
- root_path: "/home/masuser/minimax-audio/dataset/libritts"
50
  sample_rate: 24000
51
  duration: 5.0
52
  shuffle: false
@@ -67,4 +67,4 @@ eval_ae_max_samples: 100
67
  val_idx: [0, 1, 2, 3, 4, 5, 6, 7]
68
 
69
  # Enable autoencoder evaluation
70
- evaluate_ae: true
 
6
  dataset:
7
  name: class_folder_audio
8
  args:
9
+ root_path: '/home/masuser/minimax-audio/dataset/Emilia/EN'
10
  sample_rate: 24000
11
  duration: 0.38
12
  shuffle: true
 
19
  batch_size: 52
20
  num_workers: 8
21
  drop_last: true
22
+
23
  val:
24
  name: wrapper_audio_cae
25
  args:
26
  dataset:
27
  name: class_folder_audio
28
  args:
29
+ root_path: '/home/masuser/minimax-audio/dataset/libritts'
30
  sample_rate: 24000
31
  duration: 5.0
32
  shuffle: false
 
39
  batch_size: 4
40
  num_workers: 8
41
  drop_last: false
42
+
43
  eval_ae:
44
  name: wrapper_audio_cae
45
  args:
46
  dataset:
47
  name: class_folder_audio
48
  args:
49
+ root_path: '/home/masuser/minimax-audio/dataset/libritts'
50
  sample_rate: 24000
51
  duration: 5.0
52
  shuffle: false
 
67
  val_idx: [0, 1, 2, 3, 4, 5, 6, 7]
68
 
69
  # Enable autoencoder evaluation
70
+ evaluate_ae: true
flowae/configs/datasets/imagenet_ae.yaml CHANGED
@@ -4,7 +4,14 @@ datasets:
4
  args:
5
  dataset:
6
  name: class_folder
7
- args: {root_path: /home/masuser/minimax-audio/mnist_png/training, resize: 256, rand_crop: 256, rand_flip: true, image_only: true}
 
 
 
 
 
 
 
8
  resize_inp: 256
9
  gt_glores_lb: 256
10
  gt_glores_ub: 256
@@ -12,13 +19,19 @@ datasets:
12
  loader:
13
  batch_size: 14
14
  num_workers: 24
15
-
16
  val:
17
  name: wrapper_cae
18
  args:
19
  dataset:
20
  name: class_folder
21
- args: {root_path: /home/masuser/minimax-audio/mnist_png/testing, resize: 256, square_crop: true, image_only: true}
 
 
 
 
 
 
22
  resize_inp: 256
23
  gt_glores_lb: 256
24
  gt_glores_ub: 256
@@ -26,13 +39,19 @@ datasets:
26
  loader:
27
  batch_size: 14
28
  num_workers: 24
29
-
30
  eval_ae:
31
  name: wrapper_cae
32
  args:
33
  dataset:
34
  name: class_folder
35
- args: {root_path: /home/masuser/minimax-audio/mnist_png/testing, resize: 256, square_crop: true, image_only: true}
 
 
 
 
 
 
36
  resize_inp: 256
37
  gt_glores_lb: 256
38
  gt_glores_ub: 256
@@ -44,4 +63,4 @@ datasets:
44
 
45
  visualize_ae_dir: /mnt/nvme/dito
46
  visualize_ae_random_n_samples: 32
47
- eval_ae_max_samples: 5000
 
4
  args:
5
  dataset:
6
  name: class_folder
7
+ args:
8
+ {
9
+ root_path: /home/masuser/minimax-audio/mnist_png/training,
10
+ resize: 256,
11
+ rand_crop: 256,
12
+ rand_flip: true,
13
+ image_only: true,
14
+ }
15
  resize_inp: 256
16
  gt_glores_lb: 256
17
  gt_glores_ub: 256
 
19
  loader:
20
  batch_size: 14
21
  num_workers: 24
22
+
23
  val:
24
  name: wrapper_cae
25
  args:
26
  dataset:
27
  name: class_folder
28
+ args:
29
+ {
30
+ root_path: /home/masuser/minimax-audio/mnist_png/testing,
31
+ resize: 256,
32
+ square_crop: true,
33
+ image_only: true,
34
+ }
35
  resize_inp: 256
36
  gt_glores_lb: 256
37
  gt_glores_ub: 256
 
39
  loader:
40
  batch_size: 14
41
  num_workers: 24
42
+
43
  eval_ae:
44
  name: wrapper_cae
45
  args:
46
  dataset:
47
  name: class_folder
48
+ args:
49
+ {
50
+ root_path: /home/masuser/minimax-audio/mnist_png/testing,
51
+ resize: 256,
52
+ square_crop: true,
53
+ image_only: true,
54
+ }
55
  resize_inp: 256
56
  gt_glores_lb: 256
57
  gt_glores_ub: 256
 
63
 
64
  visualize_ae_dir: /mnt/nvme/dito
65
  visualize_ae_random_n_samples: 32
66
+ eval_ae_max_samples: 5000
flowae/configs/datasets/imagenet_zdm.yaml CHANGED
@@ -4,7 +4,14 @@ datasets:
4
  args:
5
  dataset:
6
  name: class_folder
7
- args: {root_path: /home/masuser/minimax-audio/mnist_png/training, resize: 256, square_crop: true, rand_flip: true, drop_label_p: 0.1}
 
 
 
 
 
 
 
8
  resize_inp: 256
9
  gt_glores_lb: 256
10
  gt_glores_ub: 256
@@ -12,13 +19,18 @@ datasets:
12
  loader:
13
  batch_size: 64
14
  num_workers: 24
15
-
16
  val:
17
  name: wrapper_cae
18
  args:
19
  dataset:
20
  name: class_folder
21
- args: {root_path: /home/masuser/minimax-audio/mnist_png/testing, resize: 256, square_crop: true}
 
 
 
 
 
22
  resize_inp: 256
23
  gt_glores_lb: 256
24
  gt_glores_ub: 256
@@ -26,13 +38,18 @@ datasets:
26
  loader:
27
  batch_size: 64
28
  num_workers: 24
29
-
30
  eval_zdm:
31
  name: wrapper_cae
32
  args:
33
  dataset:
34
  name: class_folder
35
- args: {root_path: /home/masuser/minimax-audio/mnist_png/testing, resize: 256, square_crop: true}
 
 
 
 
 
36
  resize_inp: 256
37
  gt_glores_lb: 256
38
  gt_glores_ub: 256
@@ -50,4 +67,4 @@ visualize_zdm_random_n_samples: 12
50
  visualize_zdm_batch_size: 6
51
  visualize_zdm_guidance_list: [4]
52
  visualize_zdm_denoising_file: null
53
- eval_zdm_max_samples: 5000
 
4
  args:
5
  dataset:
6
  name: class_folder
7
+ args:
8
+ {
9
+ root_path: /home/masuser/minimax-audio/mnist_png/training,
10
+ resize: 256,
11
+ square_crop: true,
12
+ rand_flip: true,
13
+ drop_label_p: 0.1,
14
+ }
15
  resize_inp: 256
16
  gt_glores_lb: 256
17
  gt_glores_ub: 256
 
19
  loader:
20
  batch_size: 64
21
  num_workers: 24
22
+
23
  val:
24
  name: wrapper_cae
25
  args:
26
  dataset:
27
  name: class_folder
28
+ args:
29
+ {
30
+ root_path: /home/masuser/minimax-audio/mnist_png/testing,
31
+ resize: 256,
32
+ square_crop: true,
33
+ }
34
  resize_inp: 256
35
  gt_glores_lb: 256
36
  gt_glores_ub: 256
 
38
  loader:
39
  batch_size: 64
40
  num_workers: 24
41
+
42
  eval_zdm:
43
  name: wrapper_cae
44
  args:
45
  dataset:
46
  name: class_folder
47
+ args:
48
+ {
49
+ root_path: /home/masuser/minimax-audio/mnist_png/testing,
50
+ resize: 256,
51
+ square_crop: true,
52
+ }
53
  resize_inp: 256
54
  gt_glores_lb: 256
55
  gt_glores_ub: 256
 
67
  visualize_zdm_batch_size: 6
68
  visualize_zdm_guidance_list: [4]
69
  visualize_zdm_denoising_file: null
70
+ eval_zdm_max_samples: 5000
flowae/configs/experiments/dito-B-audio.yaml CHANGED
@@ -8,21 +8,21 @@ model:
8
  # Encoder
9
  encoder:
10
  name: dac_encoder
11
- args: {config_name: snake}
12
-
13
  # Latent configuration - now fully convolutional
14
- z_channels: 64 # Number of latent channels
15
 
16
  zaug_p: 0.1
17
  zaug_decoding_loss_type: suffix
18
  zaug_zdm_diffusion:
19
  name: fm
20
- args: {timescale: 1000.0}
21
-
22
  # Decoder (identity for DiTo)
23
  decoder:
24
  name: identity
25
-
26
  # Renderer - Fully convolutional for dynamic duration
27
  renderer:
28
  name: fixres_renderer_wrapper
@@ -37,12 +37,11 @@ model:
37
  c2: 512
38
  pe_dim: 320
39
  t_dim: 1280
40
-
41
  # Diffusion configuration
42
  render_diffusion:
43
  name: fm
44
- args: {timescale: 1000.0}
45
-
46
- render_sampler: {name: fm_euler_sampler}
47
- render_n_steps: 50
48
 
 
 
 
8
  # Encoder
9
  encoder:
10
  name: dac_encoder
11
+ args: { config_name: snake }
12
+
13
  # Latent configuration - now fully convolutional
14
+ z_channels: 64 # Number of latent channels
15
 
16
  zaug_p: 0.1
17
  zaug_decoding_loss_type: suffix
18
  zaug_zdm_diffusion:
19
  name: fm
20
+ args: { timescale: 1000.0 }
21
+
22
  # Decoder (identity for DiTo)
23
  decoder:
24
  name: identity
25
+
26
  # Renderer - Fully convolutional for dynamic duration
27
  renderer:
28
  name: fixres_renderer_wrapper
 
37
  c2: 512
38
  pe_dim: 320
39
  t_dim: 1280
40
+
41
  # Diffusion configuration
42
  render_diffusion:
43
  name: fm
44
+ args: { timescale: 1000.0 }
 
 
 
45
 
46
+ render_sampler: { name: fm_euler_sampler }
47
+ render_n_steps: 50
flowae/configs/experiments/dito-B-f8c4-noise-sync.yaml CHANGED
@@ -7,8 +7,8 @@ model:
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
- args: {config_name: f8c4}
11
-
12
  z_shape: [64, 1, 1]
13
  z_layernorm: true
14
 
@@ -16,10 +16,10 @@ model:
16
  zaug_decoding_loss_type: suffix
17
  zaug_zdm_diffusion:
18
  name: fm
19
- args: {timescale: 1000.0}
20
-
21
- decoder: {name: identity}
22
-
23
  renderer:
24
  name: fixres_renderer_wrapper
25
  args:
@@ -33,11 +33,11 @@ model:
33
  c2: 512
34
  pe_dim: 320
35
  t_dim: 1280
36
-
37
  render_diffusion:
38
  name: fm
39
- args: {timescale: 1000.0}
40
- render_sampler: {name: fm_euler_sampler}
41
  render_n_steps: 50
42
 
43
  loss_config: {}
 
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
+ args: { config_name: f8c4 }
11
+
12
  z_shape: [64, 1, 1]
13
  z_layernorm: true
14
 
 
16
  zaug_decoding_loss_type: suffix
17
  zaug_zdm_diffusion:
18
  name: fm
19
+ args: { timescale: 1000.0 }
20
+
21
+ decoder: { name: identity }
22
+
23
  renderer:
24
  name: fixres_renderer_wrapper
25
  args:
 
33
  c2: 512
34
  pe_dim: 320
35
  t_dim: 1280
36
+
37
  render_diffusion:
38
  name: fm
39
+ args: { timescale: 1000.0 }
40
+ render_sampler: { name: fm_euler_sampler }
41
  render_n_steps: 50
42
 
43
  loss_config: {}
flowae/configs/experiments/dito-B-f8c4.yaml CHANGED
@@ -7,13 +7,13 @@ model:
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
- args: {config_name: f8c4}
11
-
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
-
15
- decoder: {name: identity}
16
-
17
  renderer:
18
  name: fixres_renderer_wrapper
19
  args:
@@ -27,11 +27,11 @@ model:
27
  c2: 512
28
  pe_dim: 320
29
  t_dim: 1280
30
-
31
  render_diffusion:
32
  name: fm
33
- args: {timescale: 1000.0}
34
- render_sampler: {name: fm_euler_sampler}
35
  render_n_steps: 50
36
 
37
  loss_config: {}
 
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
+ args: { config_name: f8c4 }
11
+
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
+
15
+ decoder: { name: identity }
16
+
17
  renderer:
18
  name: fixres_renderer_wrapper
19
  args:
 
27
  c2: 512
28
  pe_dim: 320
29
  t_dim: 1280
30
+
31
  render_diffusion:
32
  name: fm
33
+ args: { timescale: 1000.0 }
34
+ render_sampler: { name: fm_euler_sampler }
35
  render_n_steps: 50
36
 
37
  loss_config: {}
flowae/configs/experiments/dito-L-f8c4.yaml CHANGED
@@ -7,13 +7,13 @@ model:
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
- args: {config_name: f8c4}
11
-
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
-
15
- decoder: {name: identity}
16
-
17
  renderer:
18
  name: fixres_renderer_wrapper
19
  args:
@@ -27,11 +27,11 @@ model:
27
  c2: 768
28
  pe_dim: 320
29
  t_dim: 1280
30
-
31
  render_diffusion:
32
  name: fm
33
- args: {timescale: 1000.0}
34
- render_sampler: {name: fm_euler_sampler}
35
  render_n_steps: 50
36
 
37
  loss_config: {}
 
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
+ args: { config_name: f8c4 }
11
+
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
+
15
+ decoder: { name: identity }
16
+
17
  renderer:
18
  name: fixres_renderer_wrapper
19
  args:
 
27
  c2: 768
28
  pe_dim: 320
29
  t_dim: 1280
30
+
31
  render_diffusion:
32
  name: fm
33
+ args: { timescale: 1000.0 }
34
+ render_sampler: { name: fm_euler_sampler }
35
  render_n_steps: 50
36
 
37
  loss_config: {}
flowae/configs/experiments/dito-XL-f8c4-noise-sync.yaml CHANGED
@@ -7,8 +7,8 @@ model:
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
- args: {config_name: f8c4}
11
-
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
 
@@ -16,10 +16,10 @@ model:
16
  zaug_decoding_loss_type: suffix
17
  zaug_zdm_diffusion:
18
  name: fm
19
- args: {timescale: 1000.0}
20
-
21
- decoder: {name: identity}
22
-
23
  renderer:
24
  name: fixres_renderer_wrapper
25
  args:
@@ -33,11 +33,11 @@ model:
33
  c2: 1024
34
  pe_dim: 320
35
  t_dim: 1280
36
-
37
  render_diffusion:
38
  name: fm
39
- args: {timescale: 1000.0}
40
- render_sampler: {name: fm_euler_sampler}
41
  render_n_steps: 50
42
 
43
  loss_config: {}
 
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
+ args: { config_name: f8c4 }
11
+
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
 
 
16
  zaug_decoding_loss_type: suffix
17
  zaug_zdm_diffusion:
18
  name: fm
19
+ args: { timescale: 1000.0 }
20
+
21
+ decoder: { name: identity }
22
+
23
  renderer:
24
  name: fixres_renderer_wrapper
25
  args:
 
33
  c2: 1024
34
  pe_dim: 320
35
  t_dim: 1280
36
+
37
  render_diffusion:
38
  name: fm
39
+ args: { timescale: 1000.0 }
40
+ render_sampler: { name: fm_euler_sampler }
41
  render_n_steps: 50
42
 
43
  loss_config: {}
flowae/configs/experiments/dito-XL-f8c4.yaml CHANGED
@@ -7,13 +7,13 @@ model:
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
- args: {config_name: f8c4}
11
-
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
-
15
- decoder: {name: identity}
16
-
17
  renderer:
18
  name: fixres_renderer_wrapper
19
  args:
@@ -27,11 +27,11 @@ model:
27
  c2: 1024
28
  pe_dim: 320
29
  t_dim: 1280
30
-
31
  render_diffusion:
32
  name: fm
33
- args: {timescale: 1000.0}
34
- render_sampler: {name: fm_euler_sampler}
35
  render_n_steps: 50
36
 
37
  loss_config: {}
 
7
  args:
8
  encoder:
9
  name: vqgan_encoder
10
+ args: { config_name: f8c4 }
11
+
12
  z_shape: [4, 32, 32]
13
  z_layernorm: true
14
+
15
+ decoder: { name: identity }
16
+
17
  renderer:
18
  name: fixres_renderer_wrapper
19
  args:
 
27
  c2: 1024
28
  pe_dim: 320
29
  t_dim: 1280
30
+
31
  render_diffusion:
32
  name: fm
33
+ args: { timescale: 1000.0 }
34
+ render_sampler: { name: fm_euler_sampler }
35
  render_n_steps: 50
36
 
37
  loss_config: {}
flowae/configs/experiments/eval50k_zdm-XL_dito-XL-f8c4-noise-sync.yaml CHANGED
@@ -11,16 +11,16 @@ model:
11
  args:
12
  zdm_force_guidance: 2.0
13
  renderer_ema_rate: 1
14
-
15
  encoder:
16
  name: vqgan_encoder
17
- args: {config_name: f8c4}
18
-
19
  z_shape: [4, 32, 32]
20
  z_layernorm: true
21
-
22
- decoder: {name: identity}
23
-
24
  renderer:
25
  name: fixres_renderer_wrapper
26
  args:
@@ -34,11 +34,11 @@ model:
34
  c2: 1024
35
  pe_dim: 320
36
  t_dim: 1280
37
-
38
  render_diffusion:
39
  name: fm
40
- args: {timescale: 1000.0}
41
- render_sampler: {name: fm_euler_sampler}
42
  render_n_steps: 50
43
 
44
  loss_config: {}
 
11
  args:
12
  zdm_force_guidance: 2.0
13
  renderer_ema_rate: 1
14
+
15
  encoder:
16
  name: vqgan_encoder
17
+ args: { config_name: f8c4 }
18
+
19
  z_shape: [4, 32, 32]
20
  z_layernorm: true
21
+
22
+ decoder: { name: identity }
23
+
24
  renderer:
25
  name: fixres_renderer_wrapper
26
  args:
 
34
  c2: 1024
35
  pe_dim: 320
36
  t_dim: 1280
37
+
38
  render_diffusion:
39
  name: fm
40
+ args: { timescale: 1000.0 }
41
+ render_sampler: { name: fm_euler_sampler }
42
  render_n_steps: 50
43
 
44
  loss_config: {}
flowae/configs/experiments/eval50k_zdm-XL_dito-XL-f8c4.yaml CHANGED
@@ -11,16 +11,16 @@ model:
11
  args:
12
  zdm_force_guidance: 2.0
13
  renderer_ema_rate: 1
14
-
15
  encoder:
16
  name: vqgan_encoder
17
- args: {config_name: f8c4}
18
-
19
  z_shape: [4, 32, 32]
20
  z_layernorm: true
21
-
22
- decoder: {name: identity}
23
-
24
  renderer:
25
  name: fixres_renderer_wrapper
26
  args:
@@ -34,11 +34,11 @@ model:
34
  c2: 1024
35
  pe_dim: 320
36
  t_dim: 1280
37
-
38
  render_diffusion:
39
  name: fm
40
- args: {timescale: 1000.0}
41
- render_sampler: {name: fm_euler_sampler}
42
  render_n_steps: 50
43
 
44
  loss_config: {}
 
11
  args:
12
  zdm_force_guidance: 2.0
13
  renderer_ema_rate: 1
14
+
15
  encoder:
16
  name: vqgan_encoder
17
+ args: { config_name: f8c4 }
18
+
19
  z_shape: [4, 32, 32]
20
  z_layernorm: true
21
+
22
+ decoder: { name: identity }
23
+
24
  renderer:
25
  name: fixres_renderer_wrapper
26
  args:
 
34
  c2: 1024
35
  pe_dim: 320
36
  t_dim: 1280
37
+
38
  render_diffusion:
39
  name: fm
40
+ args: { timescale: 1000.0 }
41
+ render_sampler: { name: fm_euler_sampler }
42
  render_n_steps: 50
43
 
44
  loss_config: {}
flowae/configs/experiments/zdm-XL_dito-XL-f8c4-noise-sync.yaml CHANGED
@@ -8,16 +8,16 @@ model:
8
  name: dito
9
  args:
10
  renderer_ema_rate: 1
11
-
12
  encoder:
13
  name: vqgan_encoder
14
- args: {config_name: f8c4}
15
-
16
  z_shape: [4, 32, 32]
17
  z_layernorm: true
18
-
19
- decoder: {name: identity}
20
-
21
  renderer:
22
  name: fixres_renderer_wrapper
23
  args:
@@ -31,11 +31,11 @@ model:
31
  c2: 1024
32
  pe_dim: 320
33
  t_dim: 1280
34
-
35
  render_diffusion:
36
  name: fm
37
- args: {timescale: 1000.0}
38
- render_sampler: {name: fm_euler_sampler}
39
  render_n_steps: 50
40
 
41
  loss_config: {}
 
8
  name: dito
9
  args:
10
  renderer_ema_rate: 1
11
+
12
  encoder:
13
  name: vqgan_encoder
14
+ args: { config_name: f8c4 }
15
+
16
  z_shape: [4, 32, 32]
17
  z_layernorm: true
18
+
19
+ decoder: { name: identity }
20
+
21
  renderer:
22
  name: fixres_renderer_wrapper
23
  args:
 
31
  c2: 1024
32
  pe_dim: 320
33
  t_dim: 1280
34
+
35
  render_diffusion:
36
  name: fm
37
+ args: { timescale: 1000.0 }
38
+ render_sampler: { name: fm_euler_sampler }
39
  render_n_steps: 50
40
 
41
  loss_config: {}
flowae/configs/experiments/zdm-XL_dito-XL-f8c4.yaml CHANGED
@@ -4,20 +4,20 @@ __base__:
4
  - configs/trainers/zdm.yaml
5
 
6
  model:
7
- load_ckpt:
8
  name: dito
9
  args:
10
  renderer_ema_rate: 1
11
-
12
  encoder:
13
  name: vqgan_encoder
14
- args: {config_name: f8c4}
15
-
16
  z_shape: [4, 32, 32]
17
  z_layernorm: true
18
-
19
- decoder: {name: identity}
20
-
21
  renderer:
22
  name: fixres_renderer_wrapper
23
  args:
@@ -31,11 +31,11 @@ model:
31
  c2: 1024
32
  pe_dim: 320
33
  t_dim: 1280
34
-
35
  render_diffusion:
36
  name: fm
37
- args: {timescale: 1000.0}
38
- render_sampler: {name: fm_euler_sampler}
39
  render_n_steps: 50
40
 
41
  loss_config: {}
 
4
  - configs/trainers/zdm.yaml
5
 
6
  model:
7
+ load_ckpt:
8
  name: dito
9
  args:
10
  renderer_ema_rate: 1
11
+
12
  encoder:
13
  name: vqgan_encoder
14
+ args: { config_name: f8c4 }
15
+
16
  z_shape: [4, 32, 32]
17
  z_layernorm: true
18
+
19
+ decoder: { name: identity }
20
+
21
  renderer:
22
  name: fixres_renderer_wrapper
23
  args:
 
31
  c2: 1024
32
  pe_dim: 320
33
  t_dim: 1280
34
+
35
  render_diffusion:
36
  name: fm
37
+ args: { timescale: 1000.0 }
38
+ render_sampler: { name: fm_euler_sampler }
39
  render_n_steps: 50
40
 
41
  loss_config: {}
flowae/configs/models/zdm-XL_imagenet.yaml CHANGED
@@ -2,11 +2,11 @@ model:
2
  args:
3
  zdm_net:
4
  name: dit_xl_2
5
- args: {n_classes: 1001}
6
  zdm_diffusion:
7
  name: fm
8
- args: {timescale: 1000.0}
9
- zdm_sampler: {name: fm_euler_sampler}
10
  zdm_n_steps: 200
11
  zdm_train_normalize: false
12
- zdm_class_cond: 1000
 
2
  args:
3
  zdm_net:
4
  name: dit_xl_2
5
+ args: { n_classes: 1001 }
6
  zdm_diffusion:
7
  name: fm
8
+ args: { timescale: 1000.0 }
9
+ zdm_sampler: { name: fm_euler_sampler }
10
  zdm_n_steps: 200
11
  zdm_train_normalize: false
12
+ zdm_class_cond: 1000
flowae/configs/trainers/dito.yaml CHANGED
@@ -11,9 +11,9 @@ vis_iter: 50000
11
  optimizers:
12
  encoder:
13
  name: adamw
14
- args: {lr: 1.e-4}
15
  renderer:
16
  name: adamw
17
- args: {lr: 1.e-4}
18
 
19
- evaluate_ae: true
 
11
  optimizers:
12
  encoder:
13
  name: adamw
14
+ args: { lr: 1.e-4 }
15
  renderer:
16
  name: adamw
17
+ args: { lr: 1.e-4 }
18
 
19
+ evaluate_ae: true
flowae/configs/trainers/glpto.yaml CHANGED
@@ -11,14 +11,14 @@ vis_iter: 50000
11
  optimizers:
12
  encoder:
13
  name: adam
14
- args: {lr: 1.e-4, betas: [0.5, 0.9]}
15
  renderer:
16
  name: adam
17
- args: {lr: 1.e-4, betas: [0.5, 0.9]}
18
  disc:
19
  name: adam
20
- args: {lr: 1.e-4, betas: [0.5, 0.9]}
21
  gan_start_after_iters: 50000
22
  find_unused_parameters: true
23
 
24
- evaluate_ae: true
 
11
  optimizers:
12
  encoder:
13
  name: adam
14
+ args: { lr: 1.e-4, betas: [0.5, 0.9] }
15
  renderer:
16
  name: adam
17
+ args: { lr: 1.e-4, betas: [0.5, 0.9] }
18
  disc:
19
  name: adam
20
+ args: { lr: 1.e-4, betas: [0.5, 0.9] }
21
  gan_start_after_iters: 50000
22
  find_unused_parameters: true
23
 
24
+ evaluate_ae: true
flowae/configs/trainers/zdm.yaml CHANGED
@@ -14,7 +14,7 @@ ckpt_select_metric:
14
  optimizers:
15
  zdm:
16
  name: adamw
17
- args: {lr: 1.e-4, weight_decay: 0.0}
18
  find_unused_parameters: true
19
 
20
- evaluate_zdm: true
 
14
  optimizers:
15
  zdm:
16
  name: adamw
17
+ args: { lr: 1.e-4, weight_decay: 0.0 }
18
  find_unused_parameters: true
19
 
20
+ evaluate_zdm: true
flowae/load/wandb.yaml CHANGED
@@ -1,3 +1,3 @@
1
- entity:
2
- api_key:
3
- project:
 
1
+ entity:
2
+ api_key:
3
+ project: