Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

CKPT.yaml +11 -0
README.md +66 -0
attention_pooling.ckpt +3 -0
brain.ckpt +3 -0
dataloader-TRAIN.ckpt +3 -0
dialect_encoder.txt +22 -0
hyperparams.yaml +49 -0
optimizer.ckpt +3 -0
output_mlp.ckpt +3 -0
whisper.ckpt +3 -0
whisper_opt.ckpt +3 -0

CKPT.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# yamllint disable
+end-of-epoch: true
+error: 2.839878559112549
+loss: 0.18992407526573798
+macro_f1: 0.9538202964889487
+macro_precision: 0.952679604174255
+macro_recall: 0.9565894020982324
+unixtime: 1737431086.8832679
+weighted_f1: 0.9599932477445305
+weighted_precision: 0.9608126922866167
+weighted_recall: 0.9601927882898965

README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+language:
+- ar
+pipeline_tag: audio-classification
+library_name: speechbrain
+tags:
+- DIalectID
+- ADI
+- ADI-20
+- speechbrain
+- Identification
+- pytorch
+- embeddings
+datasets:
+- ADI-20
+metrics:
+- f1
+- precision
+- recall
+- accuracy
+---
+## Install Requirements
+### SpeechBrain
+First of all, please install SpeechBrain with the following command:
+```bash
+pip install git+https://github.com/speechbrain/speechbrain.git@develop
+```
+### Clone ADI github repository
+```bash
+git clone https://github.com/elyadata/ADI-20
+cd ADI-20
+pip install -r requirements.txt
+```
+### Perform Arabic Dialect Identification
+```python
+from inference.classifier_attention_pooling import WhisperDialectClassifier
+dialect_id = WhisperDialectClassifier.from_hparams(
+    source="",
+    hparams_file="hyperparms.yaml",
+    savedir="pretrained_DID/tmp").to("cuda")
+dialect_id.device = "cuda"
+dialect_id.classify_file("filenane.wav")
+```
+### Citation
+If using this work, please cite:
+```
+@inproceedings{elleuch2025adi20,
+  author    = {Haroun Elleuch and Salima Mdhaffar and Yannick Estève and Fethi Bougares},
+  title     = {ADI‑20: Arabic Dialect Identification Dataset and Models},
+  booktitle = {Proceedings of the Annual Conference of the International Speech Communication Association (Interspeech)},
+  year      = {2025},
+  address   = {Rotterdam Ahoy Convention Centre, Rotterdam, The Netherlands},
+  month     = {August},
+  days      = {17‑21}
+}
+```

attention_pooling.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e015a4ed868bc4dfcec47af51a95b622037fc13becb702cc8171a223dfddfe8
+size 6740

brain.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3888629ac8efb67b3b056f3fe0d026702b046af2a15e965378332f7d63c5ca8f
+size 50

dataloader-TRAIN.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a21369bcca05a0d5c2a7eb0ba00bd5dd34c28915c8c3da30553ee4043b3d5a6
+size 5

dialect_encoder.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+'ALG' => 0
+'EGY' => 1
+'IRA' => 2
+'JOR' => 3
+'KSA' => 4
+'KUW' => 5
+'LEB' => 6
+'LIB' => 7
+'MAU' => 8
+'MOR' => 9
+'OMA' => 10
+'PAL' => 11
+'QAT' => 12
+'SUD' => 13
+'SYR' => 14
+'UAE' => 15
+'YEM' => 16
+'BAH' => 17
+'MSA' => 18
+'TUN' => 19
+================
+'starting_index' => 0

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# ##########################################################################################
+# Model: Whisper-large-v3 Encoder + Attion pooling for Arabic Dialect Identification
+#
+# Author: Haroun Elleuch
+############################################################################################
+pretrained_path: Elyadata/ADI-whisper-ADI20
+whisper_hub: openai/whisper-large-v3
+n_languages: 20
+features_dim: 1280
+whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
+    source: !ref <whisper_hub>
+    encoder_only: True
+    freeze_encoder: False
+    save_path: !ref <whisper_hub>
+attention_pooling: !new:speechbrain.nnet.pooling.AttentionPooling
+    input_dim: !ref <features_dim>
+output_mlp: !new:speechbrain.nnet.linear.Linear
+    input_size: !ref <features_dim>
+    n_neurons: !ref <n_languages>
+    bias: False
+modules:
+    whisper: !ref <whisper>
+    attention_pooling: !ref <attention_pooling>
+    output_mlp: !ref <output_mlp>
+log_softmax: !new:speechbrain.nnet.activations.Softmax
+    apply_log: True
+label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        whisper: !ref <whisper>
+        attention_pooling: !ref <attention_pooling>
+        output_mlp: !ref <output_mlp>
+        label_encoder: !ref <label_encoder>
+    paths:
+        whisper: !ref <pretrained_path>/whisper.ckpt
+        attention_pooling: !ref <pretrained_path>/attention_pooling.ckpt
+        output_mlp: !ref <pretrained_path>/output_mlp.ckpt
+        label_encoder: !ref <pretrained_path>/dialect_encoder.txt

optimizer.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd58ef92828f25761d1f03453a16225327b46a9e13fb978c72e966a17cbf617
+size 218582

output_mlp.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9d99ccddfc47f7160b7a630ef475327c769eaa4b0e1fa302c7e152e377dad5c
+size 103723

whisper.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5721aa93158f312d0f694a573b72ed736dce9e33217c9f01d06e8d2cb149cc17
+size 2548162402

whisper_opt.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68e246d6bf1425e5e864514f09a6c2dcd5f342939f5178923578edd00493445b
+size 5080804356