primepake commited on
Commit
f973bf5
·
1 Parent(s): 92a99c9

update new model

Browse files
dac-vae/audiotools/data/datasets.py CHANGED
@@ -54,7 +54,7 @@ class AudioLoader:
54
  self.audio_lists = util.read_sources(
55
  sources, relative_path=relative_path, ext=ext
56
  )
57
-
58
  self.audio_indices = [
59
  (src_idx, item_idx)
60
  for src_idx, src in enumerate(self.audio_lists)
 
54
  self.audio_lists = util.read_sources(
55
  sources, relative_path=relative_path, ext=ext
56
  )
57
+ print(f"Found number of audio {len(self.audio_lists)} {self.audio_lists[0]}")
58
  self.audio_indices = [
59
  (src_idx, item_idx)
60
  for src_idx, src in enumerate(self.audio_lists)
dac-vae/{base.yml → configs/base.yml} RENAMED
@@ -2,10 +2,10 @@
2
  vae:
3
  sample_rate: 24000
4
  encoder_dim: 64
5
- latent_dim: 64
6
- encoder_rates: [2, 4, 5, 8]
7
  decoder_dim: 1536
8
- decoder_rates: [8, 5, 4, 2]
9
  d_in: 1
10
  d_out: 1
11
  weight_init: xavier
 
2
  vae:
3
  sample_rate: 24000
4
  encoder_dim: 64
5
+ latent_dim: 80
6
+ encoder_rates: [2, 3, 4, 4, 5]
7
  decoder_dim: 1536
8
+ decoder_rates: [5, 4, 4, 3, 2]
9
  d_in: 1
10
  d_out: 1
11
  weight_init: xavier
dac-vae/{config.yml → configs/config.yml} RENAMED
File without changes
dac-vae/configs/configx2.yml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model setup
2
+ vae:
3
+ sample_rate: 24000
4
+ encoder_dim: 64
5
+ latent_dim: 64
6
+ encoder_rates: [2, 4, 5, 8]
7
+ decoder_dim: 1536
8
+ decoder_rates: [8, 5, 4, 2]
9
+ d_in: 1
10
+ d_out: 1
11
+ weight_init: xavier
12
+ activation: snake
13
+ gain: 1.0
14
+
15
+ discriminator:
16
+ sample_rate: 24000
17
+ d_in: 1
18
+ rates: []
19
+ periods: [2, 3, 5, 7, 11]
20
+ fft_sizes: [2048, 1024, 512]
21
+ bands:
22
+ - [0.0, 0.1]
23
+ - [0.1, 0.25]
24
+ - [0.25, 0.5]
25
+ - [0.5, 0.75]
26
+ - [0.75, 1.0]
27
+
28
+ max_norm: 1000
29
+ max_norm_d: 10
30
+ initial_norm: 1000
31
+ initial_norm_d: 10
32
+
33
+ amp: false
34
+ batch_size: 128
35
+ val_batch_size: 4
36
+ num_workers: 0
37
+ device: cuda
38
+ num_samples: 530000
39
+ gan_start_step: 0
40
+ num_iters: 500000
41
+ save_iters: 1000
42
+ valid_freq: 1000
43
+ sample_freq: 2000
44
+ val_idx: [0, 1, 2, 3, 4, 5, 6, 7]
45
+ seed: 0
46
+ lambdas:
47
+ mel/loss: 15.0
48
+ adv/feat_loss: 2.0
49
+ adv/gen_loss: 1.0
50
+ kl/loss: 0.1
51
+ stft/loss: 0.0
52
+ waveform/loss: 0.0
53
+ logs_penalty: 0.0 #0.02
54
+ grad_penalty: 0.0 #1.0
55
+ lipschitz_penalty: 0.0 #0.001
56
+
57
+ VolumeNorm.db: [lufs, -18]
58
+
59
+ # Transforms
60
+ build_transform.preprocess:
61
+ - Identity
62
+ build_transform.augment_prob: 0.0
63
+ build_transform.augment:
64
+ - Identity
65
+ build_transform.postprocess:
66
+ - Identity
67
+ - Identity
68
+ - Identity
69
+
70
+ # Loss setup
71
+ MultiScaleSTFTLoss:
72
+ window_lengths: [1024, 2048]
73
+
74
+ MelSpectrogramLoss:
75
+ n_mels: [5, 10, 20, 40, 80, 160, 320]
76
+ window_lengths: [32, 64, 128, 256, 512, 1024, 2048]
77
+ mel_fmin: [0, 0, 0, 0, 0, 0, 0]
78
+ mel_fmax: [null, null, null, null, null, null, null]
79
+ pow: 1.0
80
+ clamp_eps: 1.0e-5
81
+ mag_weight: 0.0
82
+
83
+ # optimizer
84
+ optimizer:
85
+ type: Adamw
86
+ weight_decay: 0.001
87
+ lr: 0.0001
88
+ scheduler: linearlr # or constantlr
89
+ warmup_steps: 500
90
+
91
+ disc_optimizer:
92
+ type: Adamw
93
+ weight_decay: 0.001
94
+ lr: 0.0001
95
+ scheduler: linearlr # or constantlr
96
+ warmup_steps: 500
97
+
98
+ # Data
99
+ train:
100
+ duration: 0.38
101
+ n_examples: 10000000
102
+ without_replacement: true
103
+ shuffle_loaders: true
104
+
105
+ val:
106
+ duration: 5.0
107
+ n_examples: 100
108
+ without_replacement: true
109
+ shuffle_loaders: false
110
+
111
+ test:
112
+ duration: 10.0
113
+ n_examples: 1000
114
+ without_replacement: true
115
+ shuffle_loaders: false
116
+
117
+ train_folders:
118
+ Emilia_EN:
119
+ - /data/dataset/emilia/en/EN_B00000
120
+ - /data/dataset/vivoice
121
+
122
+ val_folders:
123
+ Emilia_EN:
124
+ - /data/dataset/vivoice
125
+
126
+ test_folders:
127
+ Emilia_EN:
128
+ - /data/dataset/vivoice
dac-vae/train.py CHANGED
@@ -112,13 +112,13 @@ def prepare_dataloader(
112
  shuffle: bool = True,
113
  **kwargs,
114
  ):
115
- # sampler = ResumableDistributedSampler(
116
- # dataset,
117
- # start_idx,
118
- # num_replicas=world_size,
119
- # rank=local_rank,
120
- # shuffle=shuffle,
121
- # )
122
 
123
  sampler = None
124
  if start_idx > 0:
@@ -126,10 +126,10 @@ def prepare_dataloader(
126
  indices = list(range(start_idx, len(dataset))) + list(range(start_idx))
127
  sampler = torch.utils.data.SubsetRandomSampler(indices)
128
 
129
- # if "num_workers" in kwargs:
130
- # kwargs["num_workers"] = max(kwargs["num_workers"] // world_size, 1)
131
- # kwargs["batch_size"] = max(kwargs["batch_size"] // world_size, 1)
132
- # dataloader = torch.utils.data.DataLoader(dataset, sampler=sampler, **kwargs)
133
  dataloader = torch.utils.data.DataLoader(
134
  dataset,
135
  sampler=sampler,
@@ -978,7 +978,7 @@ if __name__ == "__main__":
978
  parser.add_argument(
979
  "--config_path",
980
  type=str,
981
- default="config.yml",
982
  help="Path to config YAML",
983
  )
984
  parser.add_argument("--run_id", type=str, required=True, help="Run ID for wandb")
 
112
  shuffle: bool = True,
113
  **kwargs,
114
  ):
115
+ sampler = ResumableDistributedSampler(
116
+ dataset,
117
+ start_idx,
118
+ num_replicas=world_size,
119
+ rank=local_rank,
120
+ shuffle=shuffle,
121
+ )
122
 
123
  sampler = None
124
  if start_idx > 0:
 
126
  indices = list(range(start_idx, len(dataset))) + list(range(start_idx))
127
  sampler = torch.utils.data.SubsetRandomSampler(indices)
128
 
129
+ if "num_workers" in kwargs:
130
+ kwargs["num_workers"] = max(kwargs["num_workers"] // world_size, 1)
131
+ kwargs["batch_size"] = max(kwargs["batch_size"] // world_size, 1)
132
+ dataloader = torch.utils.data.DataLoader(dataset, sampler=sampler, **kwargs)
133
  dataloader = torch.utils.data.DataLoader(
134
  dataset,
135
  sampler=sampler,
 
978
  parser.add_argument(
979
  "--config_path",
980
  type=str,
981
+ default="configs/configx2.yml",
982
  help="Path to config YAML",
983
  )
984
  parser.add_argument("--run_id", type=str, required=True, help="Run ID for wandb")