espnet
/

geolid_combined_shared_trainable

ESPnet

TensorBoard

audio

language-identification

Model card Files Files and versions

xet

Metrics Training metrics Community

qingzhengwang commited on Aug 19, 2025

Commit

03f2e41

1 Parent(s): d70766b

Fix readme

Browse files

Files changed (1) hide show

README.md +21 -25

README.md CHANGED Viewed

@@ -113,14 +113,14 @@ The training utilized a combined dataset, merging five domain-specific corpora,
 <details><summary>expand</summary>
 ```
-config: /work/nvme/bbjs/qwang20/espnet/egs2/lid_delta/lid1/conf/mms_1b_all_no_filter_balanced_dataset/mms_ecapa_upcon_32_44_it0.4_sharedCondProj_butUpdate_50k_lr1e-5_datasetup0.3_backup_33epoch.yaml
 print_config: false
 log_level: INFO
 drop_last_iter: false
 dry_run: false
 iterator_type: category
 valid_iterator_type: category
-output_dir: exp_all_no_filter_raw/spk_mms_ecapa_upcon_32_44_it0.4_sharedCondProj_butUpdate_50k_lr1e-5_datasetup0.3_backup_33epoch_raw
 ngpu: 1
 seed: 3702
 num_workers: 8
@@ -138,6 +138,8 @@ unused_parameters: true
 sharded_ddp: false
 use_deepspeed: false
 deepspeed_config: null
 cudnn_enabled: true
 cudnn_benchmark: true
 cudnn_deterministic: false
@@ -171,10 +173,10 @@ log_interval: 100
 use_matplotlib: true
 use_tensorboard: true
 create_graph_in_tensorboard: false
-use_wandb: true
-wandb_project: lid
 wandb_id: null
-wandb_entity: qingzhew-carnegie-mellon-university
 wandb_name: null
 wandb_model_log_interval: -1
 detect_anomaly: false
@@ -192,16 +194,18 @@ valid_batch_size: null
 batch_bins: 1440000
 valid_batch_bins: null
 category_sample_size: 10
-train_shape_file:
-- exp_all_no_filter_raw/spk_stats_16k/train/speech_shape
-valid_shape_file:
-- exp_all_no_filter_raw/spk_stats_16k/valid/speech_shape
-batch_type: catpow_balance_dataset
 upsampling_factor: 0.5
-language_upsampling_factor: 0.5
 dataset_upsampling_factor: 0.3
 dataset_scaling_factor: 1.2
 max_batch_size: 6
 valid_batch_type: null
 fold_length:
 - 120000
@@ -220,14 +224,14 @@ train_data_path_and_name_and_type:
 -   - dump/raw/train_all_no_filter_lang/wav.scp
     - speech
     - sound
--   - dump/raw/train_all_no_filter_lang/utt2spk
     - lid_labels
     - text
 valid_data_path_and_name_and_type:
 -   - dump/raw/dev_ml_superb2_lang/wav.scp
     - speech
     - sound
--   - dump/raw/dev_ml_superb2_lang/utt2spk
     - lid_labels
     - text
 multi_task_dataset: false
@@ -256,8 +260,8 @@ init: null
 use_preprocessor: true
 input_size: null
 target_duration: 3.0
-spk2utt: dump/raw/train_all_no_filter_lang/spk2utt
-spk_num: 157
 sample_rate: 16000
 num_eval: 10
 rir_scp: ''
@@ -268,19 +272,11 @@ model_conf:
     - 36
     - 40
     - 44
-    lid_conditioning_layers: []
-    frozen_ecapa: false
     apply_intermediate_lang2vec_loss: true
-    apply_intermediate_lid_class_loss: false
     apply_intermediate_lang2vec_condition: true
-    apply_intermediate_lid_class_condition: false
     inter_lang2vec_loss_weight: 0.4
-    inter_lid_class_loss_weight: 0.0
     cutoff_gradient_from_backbone: false
-    cutoff_gradient_before_condtrans: true
-    independent_module: true
-    use_gate: false
-    gate_type: null
     shared_conditioning_proj: true
 frontend: s3prl_condition
 frontend_conf:
@@ -350,7 +346,7 @@ loss_conf:
     lang2vec_weight: 0.2
 required:
 - output_dir
-version: '202412'
 distributed: false
 ```

 <details><summary>expand</summary>
 ```
+config: conf/combined/mms_ecapa_upcon_32_44_it0.4_shared_trainable_dev.yaml
 print_config: false
 log_level: INFO
 drop_last_iter: false
 dry_run: false
 iterator_type: category
 valid_iterator_type: category
+output_dir: exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_dev_raw
 ngpu: 1
 seed: 3702
 num_workers: 8
 sharded_ddp: false
 use_deepspeed: false
 deepspeed_config: null
+gradient_as_bucket_view: true
+ddp_comm_hook: null
 cudnn_enabled: true
 cudnn_benchmark: true
 cudnn_deterministic: false
 use_matplotlib: true
 use_tensorboard: true
 create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
 wandb_id: null
+wandb_entity: null
 wandb_name: null
 wandb_model_log_interval: -1
 detect_anomaly: false
 batch_bins: 1440000
 valid_batch_bins: null
 category_sample_size: 10
 upsampling_factor: 0.5
+category_upsampling_factor: 0.5
 dataset_upsampling_factor: 0.3
 dataset_scaling_factor: 1.2
 max_batch_size: 6
+min_batch_size: 1
+train_shape_file:
+- exp_combined/lid_stats_16k/train/speech_shape
+valid_shape_file:
+- exp_combined/lid_stats_16k/valid/speech_shape
+batch_type: catpow_balance_dataset
+language_upsampling_factor: 0.5
 valid_batch_type: null
 fold_length:
 - 120000
 -   - dump/raw/train_all_no_filter_lang/wav.scp
     - speech
     - sound
+-   - dump/raw/train_all_no_filter_lang/utt2lang
     - lid_labels
     - text
 valid_data_path_and_name_and_type:
 -   - dump/raw/dev_ml_superb2_lang/wav.scp
     - speech
     - sound
+-   - dump/raw/dev_ml_superb2_lang/utt2lang
     - lid_labels
     - text
 multi_task_dataset: false
 use_preprocessor: true
 input_size: null
 target_duration: 3.0
+lang2utt: dump/raw/train_all_no_filter_lang/lang2utt
+lang_num: 157
 sample_rate: 16000
 num_eval: 10
 rir_scp: ''
     - 36
     - 40
     - 44
     apply_intermediate_lang2vec_loss: true
     apply_intermediate_lang2vec_condition: true
     inter_lang2vec_loss_weight: 0.4
     cutoff_gradient_from_backbone: false
+    cutoff_gradient_before_condproj: true
     shared_conditioning_proj: true
 frontend: s3prl_condition
 frontend_conf:
     lang2vec_weight: 0.2
 required:
 - output_dir
+version: '202506'
 distributed: false
 ```