| config: conf/train_asr_hubert_transfer.yaml |
| print_config: false |
| log_level: INFO |
| dry_run: false |
| iterator_type: sequence |
| output_dir: exp/asr_hubert_official_korat_khummuang_pattani_transfer_seed100 |
| ngpu: 1 |
| seed: 100 |
| num_workers: 1 |
| num_att_plot: 0 |
| dist_backend: nccl |
| dist_init_method: env:// |
| dist_world_size: 2 |
| dist_rank: 0 |
| local_rank: 0 |
| dist_master_addr: localhost |
| dist_master_port: null |
| dist_launcher: null |
| multiprocessing_distributed: true |
| unused_parameters: true |
| sharded_ddp: false |
| cudnn_enabled: true |
| cudnn_benchmark: false |
| cudnn_deterministic: true |
| collect_stats: false |
| write_collected_feats: false |
| max_epoch: 134 |
| patience: 20 |
| val_scheduler_criterion: |
| - valid |
| - loss |
| early_stopping_criterion: |
| - valid |
| - loss |
| - min |
| best_model_criterion: |
| - - valid |
| - acc |
| - max |
| keep_nbest_models: 10 |
| nbest_averaging_interval: 0 |
| grad_clip: 5.0 |
| grad_clip_type: 2.0 |
| grad_noise: false |
| accum_grad: 2 |
| no_forward_run: false |
| resume: true |
| train_dtype: float32 |
| use_amp: false |
| log_interval: null |
| use_matplotlib: true |
| use_tensorboard: true |
| create_graph_in_tensorboard: false |
| use_wandb: false |
| wandb_project: null |
| wandb_id: null |
| wandb_entity: null |
| wandb_name: null |
| wandb_model_log_interval: -1 |
| detect_anomaly: false |
| pretrain_path: null |
| init_param: |
| - /mnt/espnet_models/asr1/official_korat_khummuang_pattani/exp/asr_hubert_scratch_seed100/20epoch.pth:encoder:encoder |
| ignore_init_mismatch: true |
| freeze_param: [] |
| num_iters_per_epoch: 1500 |
| batch_size: 20 |
| valid_batch_size: null |
| batch_bins: 9000000 |
| valid_batch_bins: null |
| train_shape_file: |
| - exp/asr_stats_raw_char_sp/train/speech_shape |
| - exp/asr_stats_raw_char_sp/train/text_shape.char |
| valid_shape_file: |
| - exp/asr_stats_raw_char_sp/valid/speech_shape |
| - exp/asr_stats_raw_char_sp/valid/text_shape.char |
| batch_type: numel |
| valid_batch_type: null |
| fold_length: |
| - 80000 |
| - 150 |
| sort_in_batch: descending |
| sort_batch: descending |
| multiple_iterator: false |
| chunk_length: 500 |
| chunk_shift_ratio: 0.5 |
| num_cache_chunks: 1024 |
| chunk_excluded_key_prefixes: [] |
| train_data_path_and_name_and_type: |
| - - dump/raw/cheat_sp/wav.scp |
| - speech |
| - sound |
| - - dump/raw/cheat_sp/text |
| - text |
| - text |
| valid_data_path_and_name_and_type: |
| - - dump/raw/test/wav.scp |
| - speech |
| - sound |
| - - dump/raw/test/text |
| - text |
| - text |
| allow_variable_data_keys: false |
| max_cache_size: 0.0 |
| max_cache_fd: 32 |
| valid_max_cache_size: null |
| exclude_weight_decay: false |
| exclude_weight_decay_conf: {} |
| optim: adam |
| optim_conf: |
| lr: 0.0002 |
| weight_decay: 1.0e-06 |
| scheduler: warmuplr |
| scheduler_conf: |
| warmup_steps: 3000 |
| token_list: |
| - <blank> |
| - <unk> |
| - <space> |
| - อ |
| - า |
| - น |
| - ม |
| - ่ |
| - ร |
| - ะ |
| - ง |
| - ด |
| - เ |
| - ห |
| - ี |
| - ้ |
| - ก |
| - ั |
| - ย |
| - บ |
| - ล |
| - แ |
| - ิ |
| - ค |
| - ว |
| - ส |
| - ต |
| - ข |
| - ป |
| - ์ |
| - ู |
| - ท |
| - ไ |
| - จ |
| - ื |
| - ุ |
| - พ |
| - ำ |
| - ช |
| - โ |
| - ๋ |
| - ซ |
| - ึ |
| - ็ |
| - ฮ |
| - ใ |
| - ๊ |
| - ญ |
| - ผ |
| - ษ |
| - ศ |
| - ฆ |
| - ณ |
| - ธ |
| - ถ |
| - ฟ |
| - ภ |
| - ฤ |
| - ฝ |
| - ฉ |
| - ฐ |
| - ฑ |
| - ฬ |
| - ฎ |
| - <sos/eos> |
| init: null |
| input_size: 1 |
| ctc_conf: |
| dropout_rate: 0.0 |
| ctc_type: builtin |
| reduce: true |
| ignore_nan_grad: null |
| zero_infinity: true |
| joint_net_conf: null |
| use_preprocessor: true |
| token_type: char |
| bpemodel: null |
| non_linguistic_symbols: null |
| cleaner: null |
| g2p: null |
| speech_volume_normalize: null |
| rir_scp: null |
| rir_apply_prob: 1.0 |
| noise_scp: null |
| noise_apply_prob: 1.0 |
| noise_db_range: '13_15' |
| short_noise_thres: 0.5 |
| aux_ctc_tasks: [] |
| frontend: null |
| frontend_conf: {} |
| specaug: null |
| specaug_conf: {} |
| normalize: utterance_mvn |
| normalize_conf: {} |
| model: espnet |
| model_conf: |
| ctc_weight: 0.3 |
| lsm_weight: 0.1 |
| length_normalized_loss: false |
| preencoder: null |
| preencoder_conf: {} |
| encoder: hubert |
| encoder_conf: |
| output_size: 256 |
| normalize_before: false |
| hubert_url: https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt |
| hubert_dir_path: ./downloads/hubert_pretrained_models/hubert_base_ls960.pt |
| freeze_finetune_updates: 3000 |
| postencoder: null |
| postencoder_conf: {} |
| decoder: transformer |
| decoder_conf: |
| attention_heads: 4 |
| linear_units: 2048 |
| num_blocks: 6 |
| dropout_rate: 0.1 |
| positional_dropout_rate: 0.1 |
| self_attention_dropout_rate: 0.1 |
| src_attention_dropout_rate: 0.1 |
| preprocessor: default |
| preprocessor_conf: {} |
| required: |
| - output_dir |
| - token_list |
| version: '202301' |
| distributed: true |
|
|