speechbrain
/

PIQ-ESC50

Sound Classification

Interpretable Sound Classification

Posthoc Interpretation

Posthoc Interpretation via Quantization

Model card Files Files and versions

cemsubakan commited on Feb 25, 2024

Commit

25df5de

·

verified ·

1 Parent(s): b665af3

Update hyperparams.yaml

Files changed (1) hide show

hyperparams.yaml +12 -7

hyperparams.yaml CHANGED Viewed

@@ -22,10 +22,10 @@ n_mels: 80
 out_n_neurons: 50
 # embedding_model: !new:custom_models.Conv2dEncoder_v2
-embedding_model: &id002 !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
   dim: 256
-classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
   input_size: 256
   out_neurons: 50
   lin_blocks: 1
@@ -38,25 +38,30 @@ n_fft: 1024
 spec_mag_power: 0.5
 hop_length: 11.6099
 win_length: 23.2199
-compute_stft: &id005 !new:speechbrain.processing.features.STFT
   n_fft: 1024
   hop_length: 11.6099
   win_length: 23.2199
   sample_rate: 16000
-compute_fbank: &id006 !new:speechbrain.processing.features.Filterbank
   n_mels: 80
   n_fft: 1024
   sample_rate: 16000
-compute_istft: &id007 !new:speechbrain.processing.features.ISTFT
   sample_rate: 16000
   hop_length: 11.6099
   win_length: 23.2199
 label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
-psi_model: *id004
 modules:
   compute_stft: !ref <compute_stft>

 out_n_neurons: 50
 # embedding_model: !new:custom_models.Conv2dEncoder_v2
+embedding_model: !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
   dim: 256
+classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
   input_size: 256
   out_neurons: 50
   lin_blocks: 1
 spec_mag_power: 0.5
 hop_length: 11.6099
 win_length: 23.2199
+compute_stft: !new:speechbrain.processing.features.STFT
   n_fft: 1024
   hop_length: 11.6099
   win_length: 23.2199
   sample_rate: 16000
+compute_fbank: !new:speechbrain.processing.features.Filterbank
   n_mels: 80
   n_fft: 1024
   sample_rate: 16000
+compute_istft: !new:speechbrain.processing.features.ISTFT
   sample_rate: 16000
   hop_length: 11.6099
   win_length: 23.2199
 label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
+psi_model: !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio
+      dim: 256
+      K: 1024
+      shared_keys: 0
+      activate_class_partitioning: true
+      use_adapter: true
+      adapter_reduce_dim: true
 modules:
   compute_stft: !ref <compute_stft>