config: conf/tuning/train_xeus.yaml print_config: false log_level: INFO drop_last_iter: false dry_run: false iterator_type: sequence valid_iterator_type: null output_dir: exp/ssl_train_xeus_raw ngpu: 1 seed: 1 num_workers: 4 num_att_plot: 0 dist_backend: nccl dist_init_method: env:// dist_world_size: null dist_rank: null local_rank: 0 dist_master_addr: null dist_master_port: null dist_launcher: null multiprocessing_distributed: false unused_parameters: false sharded_ddp: false use_deepspeed: false deepspeed_config: null gradient_as_bucket_view: true ddp_comm_hook: null cudnn_enabled: true cudnn_benchmark: false cudnn_deterministic: true use_tf32: false collect_stats: false write_collected_feats: false max_epoch: 1 patience: null val_scheduler_criterion: - valid - loss early_stopping_criterion: - valid - loss - min best_model_criterion: - - valid - loss - min keep_nbest_models: 5 nbest_averaging_interval: 0 grad_clip: 10.0 grad_clip_type: 2.0 grad_noise: false accum_grad: 1 no_forward_run: false resume: true train_dtype: float32 use_amp: true log_interval: null use_matplotlib: true use_tensorboard: false create_graph_in_tensorboard: false use_wandb: false wandb_project: null wandb_id: null wandb_entity: null wandb_name: null wandb_model_log_interval: -1 detect_anomaly: false use_adapter: false adapter: lora save_strategy: all adapter_conf: {} pretrain_path: null init_param: [] ignore_init_mismatch: false freeze_param: [] num_iters_per_epoch: 10 batch_size: 20 valid_batch_size: null batch_bins: 1600000 valid_batch_bins: null category_sample_size: 10 train_shape_file: - exp/ssl_stats_raw/train/speech_shape valid_shape_file: - exp/ssl_stats_raw/valid/speech_shape batch_type: numel valid_batch_type: null fold_length: - 80000 - 400 sort_in_batch: descending shuffle_within_batch: false sort_batch: descending multiple_iterator: false chunk_length: 500 chunk_shift_ratio: 0.5 num_cache_chunks: 1024 chunk_excluded_key_prefixes: [] chunk_default_fs: null chunk_max_abs_length: null chunk_discard_short_samples: true train_data_path_and_name_and_type: - - dump/raw/train_960/wav.scp - speech - sound - - dump/raw/train_960/text - text - text valid_data_path_and_name_and_type: - - dump/raw/dev/wav.scp - speech - sound - - dump/raw/dev/text - text - text multi_task_dataset: false allow_variable_data_keys: false max_cache_size: 0.0 max_cache_fd: 32 allow_multi_rates: false valid_max_cache_size: null exclude_weight_decay: false exclude_weight_decay_conf: {} optim: adam optim_conf: lr: 0.0003 scheduler: warmuplr scheduler_conf: warmup_steps: 32000 token_list: - '30' - '4' - '72' - '305' - '275' - '24' - '369' - '125' - '202' - '368' - '270' - '296' - '68' - '188' - '418' - '223' - '8' - '338' - '437' - '14' - '299' - '469' - '415' - '11' - '41' - '227' - '44' - '35' - '179' - '449' - '23' - '10' - '416' - '291' - '100' - '74' - '327' - '107' - '321' - '208' - '76' - '267' - '130' - '173' - '96' - '162' - '456' - '84' - '98' - '217' - '48' - '482' - '127' - '110' - '366' - '336' - '387' - '105' - '373' - '139' - '61' - '370' - '464' - '397' - '281' - '151' - '154' - '155' - '203' - '440' - '119' - '71' - '320' - '93' - '20' - '138' - '78' - '216' - '104' - '205' - '38' - '382' - '238' - '474' - '225' - '465' - '309' - '17' - '285' - '90' - '375' - '356' - '256' - '392' - '311' - '398' - '9' - '264' - '341' - '168' - '339' - '40' - '344' - '422' - '63' - '396' - '51' - '184' - '441' - '346' - '252' - '206' - '322' - '444' - '198' - '66' - '269' - '145' - '69' - '244' - '463' - '37' - '172' - '271' - '313' - '279' - '106' - '377' - '158' - '5' - '445' - '455' - '134' - '287' - '7' - '297' - '420' - '13' - '31' - '484' - '91' - '34' - '488' - '468' - '21' - '193' - '288' - '159' - '247' - '476' - '25' - '265' - '115' - '50' - '394' - '197' - '116' - '57' - '182' - '378' - '135' - '89' - '167' - '19' - '148' - '425' - '103' - '95' - '454' - '376' - '178' - '79' - '424' - '261' - '36' - '426' - '152' - '102' - '292' - '258' - '60' - '328' - '280' - '273' - '111' - '240' - '213' - '483' - '300' - '363' - '174' - '317' - '419' - '439' - '42' - '118' - '222' - '15' - '276' - '277' - '166' - '304' - '114' - '329' - '395' - '413' - '435' - '33' - '266' - '133' - '210' - '408' - '330' - '315' - '251' - '6' - '357' - '171' - '56' - '1' - '59' - '359' - '28' - '215' - '97' - '274' - '170' - '49' - '81' - '108' - '282' - '85' - '200' - '80' - '243' - '364' - '113' - '176' - '433' - '77' - '335' - '231' - '462' - '62' - '286' - '67' - '191' - '228' - '16' - '22' - '122' - '235' - '331' - '137' - '289' - '92' - '157' - '417' - '319' - '2' - '101' - '129' - '169' - '26' - '165' - '143' - '229' - '220' - '324' - '393' - '272' - '43' - '367' - '204' - '410' - '278' - '73' - '65' - '428' - '411' - '380' - '99' - '83' - '412' - '307' - '306' - '201' - '361' - '232' - '290' - '109' - '140' - '438' - '64' - '447' - '374' - '301' - '249' - '186' - '234' - '121' - '239' - '255' - '82' - '384' - '160' - '494' - '351' - '283' - '32' - '54' - '52' - '187' - '337' - '112' - '260' - '132' - '47' - '457' - '211' - '490' - '430' - '423' - '175' - '142' - '499' - '407' - '303' - '12' - '403' - '209' - '233' - '262' - '146' - '436' - '219' - '316' - '123' - '460' - '39' - '58' - '333' - '475' - '70' - '218' - '199' - '295' - '389' - '345' - '156' - '383' - '390' - '192' - '343' - '150' - '318' - '196' - '94' - '194' - '27' - '459' - '257' - '371' - '498' - '485' - '190' - '402' - '163' - '491' - '0' - '241' - '467' - '149' - '18' - '429' - '421' - '189' - '365' - '3' - '75' - '141' - '259' - '120' - '372' - '405' - '354' - '446' - '340' - '406' - '353' - '53' - '334' - '427' - '432' - '442' - '131' - '88' - '470' - '473' - '254' - '349' - '214' - '153' - '342' - '212' - '434' - '46' - '86' - '350' - '284' - '308' - '323' - '381' - '161' - '391' - '248' - '180' - '230' - '452' - '325' - '246' - '224' - '347' - '195' - '128' - '55' - '314' - '126' - '147' - '481' - '185' - '358' - '478' - '400' - '495' - '388' - '177' - '181' - '466' - '362' - '268' - '326' - '144' - '493' - '489' - '450' - '399' - '443' - '253' - '236' - '117' - '448' - '312' - '379' - '492' - '496' - '87' - '332' - '298' - '497' - '221' - '480' - '226' - '302' - '348' - '136' - '451' - '479' - '183' - '45' - '404' - '263' - '477' - '355' - '29' - '414' - '237' - '409' - '385' - '461' - '386' - '124' - '401' - '352' - '293' - '471' - '458' - '472' - '486' - '164' - '453' - '310' - '207' - '487' - '294' - '360' - '245' - '242' - '431' - '250' - - init: xavier_uniform collate_fn_conf: label_downsampling: 1 pad: false rand_crop: true crop_audio: true input_size: null num_classes: null use_preprocessor: true token_type: word bpemodel: null non_linguistic_symbols: null cleaner: null g2p: null speech_volume_normalize: null rir_scp: null rir_apply_prob: 1.0 noise_scp: null noise_apply_prob: 1.0 noise_db_range: '13_15' window_size: null window_shift: null loss: - name: hubert conf: num_classes: 2048 final_dim: 768 util: - name: mask conf: {} frontend: wav2vec_cnn frontend_conf: norm_mode: layer_norm conv_mode: standard bias: true normalize_audio: true normalize_output: false fs: 16k specaug: null specaug_conf: {} normalize: null normalize_conf: {} preencoder: linear preencoder_conf: output_size: 1024 encoder: e_branchformer encoder_conf: output_size: 1024 attention_heads: 8 attention_layer_type: selfattn pos_enc_layer_type: conv rel_pos_type: latest cgmlp_linear_units: 4096 cgmlp_conv_kernel: 31 use_linear_after_conv: false gate_activation: identity num_blocks: 19 dropout_rate: 0.1 positional_dropout_rate: 0.1 attention_dropout_rate: 0.1 input_layer: null layer_drop_rate: 0.0 linear_units: 4096 positionwise_layer_type: linear macaron_ffn: true use_ffn: true merge_conv_kernel: 31 model: espnet model_conf: {} required: - output_dir - token_list version: '202412' distributed: false