| config: conf/tuning/BSCodec_band_simvq_3band.yaml |
| print_config: false |
| log_level: INFO |
| drop_last_iter: false |
| dry_run: false |
| iterator_type: chunk |
| valid_iterator_type: null |
| output_dir: exp/codec_BSCodec_band_simvq_3band_raw_fs24000 |
| ngpu: 1 |
| seed: 777 |
| num_workers: 1 |
| num_att_plot: 0 |
| dist_backend: nccl |
| dist_init_method: env:// |
| dist_world_size: 2 |
| dist_rank: 0 |
| local_rank: 0 |
| dist_master_addr: localhost |
| dist_master_port: 55173 |
| dist_launcher: null |
| multiprocessing_distributed: true |
| unused_parameters: true |
| sharded_ddp: false |
| use_deepspeed: false |
| deepspeed_config: null |
| gradient_as_bucket_view: true |
| ddp_comm_hook: null |
| cudnn_enabled: true |
| cudnn_benchmark: false |
| cudnn_deterministic: false |
| use_tf32: false |
| collect_stats: false |
| write_collected_feats: false |
| max_epoch: 1200 |
| patience: null |
| val_scheduler_criterion: |
| - valid |
| - loss |
| early_stopping_criterion: |
| - valid |
| - loss |
| - min |
| best_model_criterion: |
| - - valid |
| - mel_loss |
| - min |
| - - train |
| - mel_loss |
| - min |
| - - train |
| - total_count |
| - max |
| keep_nbest_models: 5 |
| nbest_averaging_interval: 0 |
| grad_clip: -1 |
| grad_clip_type: 2.0 |
| grad_noise: false |
| accum_grad: 1 |
| no_forward_run: false |
| resume: true |
| train_dtype: float32 |
| use_amp: false |
| log_interval: 50 |
| use_matplotlib: true |
| use_tensorboard: true |
| create_graph_in_tensorboard: false |
| use_wandb: false |
| wandb_project: null |
| wandb_id: null |
| wandb_entity: null |
| wandb_name: null |
| wandb_model_log_interval: -1 |
| detect_anomaly: false |
| use_adapter: false |
| adapter: lora |
| save_strategy: all |
| adapter_conf: {} |
| pretrain_path: null |
| init_param: [] |
| ignore_init_mismatch: false |
| freeze_param: [] |
| num_iters_per_epoch: 2000 |
| batch_size: 72 |
| valid_batch_size: null |
| batch_bins: 1000000 |
| valid_batch_bins: null |
| category_sample_size: 10 |
| train_shape_file: |
| - exp/codec_stats_raw/train/audio_shape |
| valid_shape_file: |
| - exp/codec_stats_raw/valid/audio_shape |
| batch_type: unsorted |
| valid_batch_type: null |
| fold_length: |
| - 256000 |
| sort_in_batch: descending |
| shuffle_within_batch: false |
| sort_batch: descending |
| multiple_iterator: false |
| chunk_length: 24000 |
| chunk_shift_ratio: 0.5 |
| num_cache_chunks: 16 |
| chunk_excluded_key_prefixes: [] |
| chunk_default_fs: null |
| chunk_max_abs_length: null |
| chunk_discard_short_samples: true |
| train_data_path_and_name_and_type: |
| - - dump/raw/train_total/wav.scp |
| - audio |
| - sound |
| valid_data_path_and_name_and_type: |
| - - dump/raw/dev_small/wav.scp |
| - audio |
| - sound |
| multi_task_dataset: false |
| allow_variable_data_keys: false |
| max_cache_size: 0.0 |
| max_cache_fd: 32 |
| allow_multi_rates: false |
| valid_max_cache_size: null |
| exclude_weight_decay: false |
| exclude_weight_decay_conf: {} |
| optim: adamw |
| optim_conf: |
| lr: 0.0002 |
| betas: |
| - 0.5 |
| - 0.9 |
| eps: 1.0e-09 |
| weight_decay: 0.0 |
| scheduler: exponentiallr |
| scheduler_conf: |
| gamma: 0.999875 |
| optim2: adamw |
| optim2_conf: |
| lr: 0.0002 |
| betas: |
| - 0.5 |
| - 0.9 |
| eps: 1.0e-09 |
| weight_decay: 0.0 |
| scheduler2: exponentiallr |
| scheduler2_conf: |
| gamma: 0.999875 |
| generator_first: true |
| skip_discriminator_prob: 0.0 |
| model_conf: {} |
| use_preprocessor: true |
| codec: bscodec |
| codec_conf: |
| sampling_rate: 24000 |
| generator_params: |
| hidden_dim: 512 |
| bands: |
| - - 0 |
| - 2000 |
| - - 2000 |
| - 4000 |
| - - 4000 |
| - 12000 |
| encdec_channels: 1 |
| encdec_n_filters: 32 |
| encdec_n_residual_layers: 3 |
| encdec_ratios: |
| - 8 |
| - 5 |
| - 4 |
| - 2 |
| encdec_activation: Snake |
| encdec_norm: weight_norm |
| encdec_kernel_size: 7 |
| encdec_residual_kernel_size: 7 |
| encdec_last_kernel_size: 7 |
| encdec_dilation_base: 2 |
| encdec_causal: false |
| encdec_pad_mode: reflect |
| encdec_true_skip: false |
| encdec_compress: 2 |
| encdec_lstm: 2 |
| quantize_choice: band_simvq |
| quantize_codebook_size: 131072 |
| quantize_codebook_dim: 128 |
| decoder_trim_right_ratio: 1.0 |
| decoder_final_activation: null |
| decoder_final_activation_params: null |
| sample_rate: 24000 |
| discriminator_params: |
| msmpmb_discriminator_params: |
| rates: [] |
| sample_rate: 24000 |
| fft_sizes: |
| - 2048 |
| - 1024 |
| - 512 |
| periods: |
| - 2 |
| - 3 |
| - 5 |
| - 7 |
| - 11 |
| period_discriminator_params: |
| in_channels: 1 |
| out_channels: 1 |
| kernel_sizes: |
| - 5 |
| - 3 |
| channels: 32 |
| downsample_scales: |
| - 3 |
| - 3 |
| - 3 |
| - 3 |
| - 1 |
| max_downsample_channels: 1024 |
| bias: true |
| nonlinear_activation: LeakyReLU |
| nonlinear_activation_params: |
| negative_slope: 0.1 |
| use_weight_norm: true |
| use_spectral_norm: false |
| band_discriminator_params: |
| hop_factor: 0.25 |
| sample_rate: 24000 |
| bands: |
| - - 0.0 |
| - 0.1 |
| - - 0.1 |
| - 0.25 |
| - - 0.25 |
| - 0.5 |
| - - 0.5 |
| - 0.75 |
| - - 0.75 |
| - 1.0 |
| channel: 32 |
| generator_adv_loss_params: |
| average_by_discriminators: false |
| loss_type: mse |
| discriminator_adv_loss_params: |
| average_by_discriminators: false |
| loss_type: mse |
| use_feat_match_loss: true |
| feat_match_loss_params: |
| average_by_discriminators: false |
| average_by_layers: false |
| include_final_outputs: true |
| use_mel_loss: true |
| mel_loss_params: |
| range_start: 6 |
| range_end: 11 |
| window: hann |
| n_mels: 80 |
| fmin: 0 |
| fmax: null |
| log_base: null |
| fs: 24000 |
| use_dual_decoder: true |
| lambda_quantization: 1.0 |
| lambda_reconstruct: 1.0 |
| lambda_adv: 1.0 |
| lambda_mel: 45.0 |
| lambda_feat_match: 2.0 |
| cache_generator_outputs: true |
| required: |
| - output_dir |
| version: '202503' |
| distributed: true |
|
|