File size: 962 Bytes
95833e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
train:
  batch_size: 8
  lr: 2e-4
  weight_decay: 0.00
  num_workers: 0
  gpus: 1 #ddp
  opt_eps: 1e-9
  beta1: 0.9
  beta2: 0.99

data:
  timestamp_path: 'vctk-silence-labels/vctk-silences.0.92.txt'
  base_dir: '/DATA1/VCTK-0.92/wav48_silence_trimmed/'
  dir: 'D:/Code/StyleTTS2/acoustic_rec_data'
  format: '*.wav'
  cv_ratio: (100./108., 8./108., 0.00) #train/val/test

audio:
  filter_length: 1024
  hop_length: 256
  win_length: 1024
  sampling_rate: 48000
  sr_min: 6000
  sr_max: 48000
  length: 32768 #32*1024 ~ 1sec

arch:
  residual_layers: 15 #
  residual_channels: 64
  pos_emb_dim: 512
  bsft_channels: 64

logsnr:
  logsnr_min: -20.0
  logsnr_max: 20.0

dpm:
  max_step: 1000
  pos_emb_scale: 50000
  pos_emb_channels: 128 
  infer_step: 8
  infer_schedule: "torch.tensor([-2.6, -0.8, 2.0, 6.4, 9.8, 12.9, 14.4, 17.2])"

log:
  name: 'nuwave2'
  checkpoint_dir: 'checkpoint'
  tensorboard_dir: 'tensorboard'
  test_result_dir: 'test_sample/result'