iliasslasri
/

robust_speech_quantizer

Automatic Speech Recognition

Model card Files Files and versions

Metrics Training metrics Community

Iliass Lasri commited on Mar 25

Commit

7af5906

·

1 Parent(s): c3827e3

add examples cong

Files changed (1) hide show

config.yaml +74 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+training:
+  run_name: example_config
+  epochs: 150
+  learning_rate: 0.0001
+  log_interval: 100
+  checkpoint_dir: null
+  resume_from: null
+  n_iterative_pseudolabeling: 3
+  lr_scheduler:
+    _target_: torch.optim.lr_scheduler.CosineAnnealingLR
+    T_max: ${training.epochs}
+    eta_min: 1.0e-06
+  lr_scheduler_start_epoch: -1
+dataset:
+  root: data/LibriSpeech
+  train_split: train-clean-100
+  test_split: test-clean
+  batch_size: 32
+  num_workers: 1
+  noise_dir: noise_fullband
+  max_audio_length: 160000
+  augmentations:
+    max_augs: 4 # in all our experiments we used 4
+    time_stretch: true
+    pitch_shift: true
+    reverberation: true
+    noise: true
+    rir_dir: data/rirs
+    activate_extra_augs: true
+    echo:
+      enabled: true
+      volume_range:
+      - 0.1
+      - 0.5
+      duration_range:
+      - 0.1
+      - 0.5
+    random_noise:
+      enabled: true
+      noise_std: 0.001
+    pink_noise:
+      enabled: true
+      noise_std: 0.01
+    lowpass_filter:
+      enabled: true
+      cutoff_freq: 5000
+    highpass_filter:
+      enabled: true
+      cutoff_freq: 500
+    bandpass_filter:
+      enabled: true
+      cutoff_freq_low: 300
+      cutoff_freq_high: 8000
+    smooth:
+      enabled: true
+      window_size_range:
+      - 2
+      - 10
+    boost_audio:
+      enabled: true
+      amount: 20
+    duck_audio:
+      enabled: true
+      amount: 20
+    updownresample:
+      enabled: true
+      intermediate_freq: 32000
+model:
+  name: hubert-base-ls960
+  layer: 6
+  vocab_size: 500
+  kind_kmeans: kmeans
+  quantizer:
+    hidden_dim: 256