|
|
|
|
|
|
|
|
|
|
|
project_name: "MQGAN" |
|
|
|
|
|
data: |
|
|
data_dir: '../hifispeech4_CORS' |
|
|
output_dir: 'logs/mqgan_speech4_varcrop_newd' |
|
|
validation_split: 0.02 |
|
|
crop_len: [256, 192, 128] |
|
|
batch_size: 16 |
|
|
num_workers: 0 |
|
|
|
|
|
model: |
|
|
mel_channels: 128 |
|
|
generator: |
|
|
channels: [512, 512, 512, 768] |
|
|
kernel_sizes: [3, 3, 5, 7] |
|
|
dropout: 0.1 |
|
|
fsq_levels: [8, 5, 5, 5] |
|
|
refiner_base_channels: 64 |
|
|
refiner_depth: 3 |
|
|
discriminator_patch: |
|
|
hidden_channels: [256, 256, 384, 512, 512] |
|
|
kernel_sizes: [5, 5, 5, 3, 3, 3] |
|
|
strides: [[1,2], [2,2], [2,2], [2,1], [2,1], [2,1]] |
|
|
discriminator_multibin: |
|
|
hidden_channels: [128, 128, 256, 256, 384] |
|
|
kernel_sizes: [7, 5, 3, 3, 3, 3] |
|
|
n_bins: 8 |
|
|
n_no_strides: 2 |
|
|
|
|
|
training: |
|
|
num_epochs: 1000 |
|
|
lr: 0.0001 |
|
|
beta1: 0.9 |
|
|
beta2: 0.999 |
|
|
lr_d_factor: 1.15 |
|
|
d_beta1: 0.5 |
|
|
d_beta2: 0.999 |
|
|
warmup_steps: 1000 |
|
|
discriminator_train_start_epoch: 10 |
|
|
loss_weights: |
|
|
fm_lambda: 0.25 |
|
|
Gloss_lambda: 15.0 |
|
|
recon_lambda: 15.0 |
|
|
use_fm_loss: False |
|
|
seed: 42 |
|
|
no_cuda: False |
|
|
pretrained: null |
|
|
|
|
|
logging: |
|
|
eval_interval: 2 |
|
|
save_interval: 2 |
|
|
num_plot_examples: 10 |
|
|
wandb: |
|
|
entity: null |
|
|
project: "MQGAN" |
|
|
|