Commit ·
2689dbf
0
Parent(s):
Duplicate from utter-project/mHuBERT-147
Browse filesCo-authored-by: Marcely Zanon Boito <mzboito@users.noreply.huggingface.co>
This view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +76 -0
- README.md +217 -0
- checkpoint_best.pt +3 -0
- config.json +76 -0
- manifest/CV_1.tsv +3 -0
- manifest/CV_10.tsv +3 -0
- manifest/CV_11.tsv +3 -0
- manifest/CV_2.tsv +3 -0
- manifest/CV_3.tsv +3 -0
- manifest/CV_4.tsv +3 -0
- manifest/CV_5.tsv +3 -0
- manifest/CV_6.tsv +3 -0
- manifest/CV_7.tsv +3 -0
- manifest/CV_8.tsv +3 -0
- manifest/CV_9.tsv +3 -0
- manifest/MLS.tsv +3 -0
- manifest/MLS_1.tsv +3 -0
- manifest/MLS_10.tsv +3 -0
- manifest/MLS_11.tsv +3 -0
- manifest/MLS_12.tsv +3 -0
- manifest/MLS_13.tsv +3 -0
- manifest/MLS_14.tsv +3 -0
- manifest/MLS_15.tsv +3 -0
- manifest/MLS_16.tsv +3 -0
- manifest/MLS_17.tsv +3 -0
- manifest/MLS_18.tsv +3 -0
- manifest/MLS_19.tsv +3 -0
- manifest/MLS_2.tsv +3 -0
- manifest/MLS_20.tsv +3 -0
- manifest/MLS_3.tsv +3 -0
- manifest/MLS_4.tsv +3 -0
- manifest/MLS_5.tsv +3 -0
- manifest/MLS_6.tsv +3 -0
- manifest/MLS_7.tsv +3 -0
- manifest/MLS_8.tsv +3 -0
- manifest/MLS_9.tsv +3 -0
- manifest/VL_1.tsv +0 -0
- manifest/VL_10.tsv +0 -0
- manifest/VL_11.tsv +0 -0
- manifest/VL_12.tsv +3 -0
- manifest/VL_13.tsv +3 -0
- manifest/VL_14.tsv +0 -0
- manifest/VL_15.tsv +3 -0
- manifest/VL_16.tsv +3 -0
- manifest/VL_2.tsv +3 -0
- manifest/VL_3.tsv +0 -0
- manifest/VL_4.tsv +0 -0
- manifest/VL_5.tsv +0 -0
- manifest/VL_6.tsv +0 -0
- manifest/VL_7.tsv +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
mhubert147_faiss.index filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
manifest/CV_1.tsv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
manifest/CV_10.tsv filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
manifest/CV_2.tsv filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
manifest/CV_3.tsv filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
manifest/CV_4.tsv filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
manifest/CV_5.tsv filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
manifest/CV_6.tsv filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
manifest/CV_7.tsv filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
manifest/CV_8.tsv filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
manifest/CV_9.tsv filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
manifest/CV_11.tsv filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
manifest/MLS_1.tsv filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
manifest/MLS_2.tsv filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
manifest/MLS_3.tsv filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
manifest/MLS_4.tsv filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
manifest/MLS_5.tsv filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
manifest/MLS.tsv filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
manifest/MLS_10.tsv filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
manifest/MLS_11.tsv filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
manifest/MLS_12.tsv filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
manifest/MLS_13.tsv filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
manifest/MLS_14.tsv filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
manifest/MLS_15.tsv filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
manifest/MLS_16.tsv filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
manifest/MLS_17.tsv filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
manifest/MLS_18.tsv filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
manifest/MLS_19.tsv filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
manifest/MLS_20.tsv filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
manifest/MLS_6.tsv filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
manifest/MLS_7.tsv filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
manifest/MLS_8.tsv filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
manifest/MLS_9.tsv filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
manifest/samromur.tsv filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
manifest/VL_12.tsv filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
manifest/VL_2.tsv filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
manifest/VL_8.tsv filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
manifest/VL_13.tsv filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
manifest/VL_15.tsv filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
manifest/VL_16.tsv filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
manifest/VP_2.tsv filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-nc-sa-4.0
|
| 3 |
+
language:
|
| 4 |
+
- ab
|
| 5 |
+
- af
|
| 6 |
+
- am
|
| 7 |
+
- ar
|
| 8 |
+
- as
|
| 9 |
+
- az
|
| 10 |
+
- ba
|
| 11 |
+
- be
|
| 12 |
+
- bn
|
| 13 |
+
- bo
|
| 14 |
+
- bs
|
| 15 |
+
- br
|
| 16 |
+
- bg
|
| 17 |
+
- ca
|
| 18 |
+
- cs
|
| 19 |
+
- cv
|
| 20 |
+
- cy
|
| 21 |
+
- da
|
| 22 |
+
- de
|
| 23 |
+
- dv
|
| 24 |
+
- el
|
| 25 |
+
- en
|
| 26 |
+
- eo
|
| 27 |
+
- et
|
| 28 |
+
- eu
|
| 29 |
+
- ee
|
| 30 |
+
- fo
|
| 31 |
+
- fa
|
| 32 |
+
- tl
|
| 33 |
+
- fi
|
| 34 |
+
- fr
|
| 35 |
+
- fy
|
| 36 |
+
- ga
|
| 37 |
+
- gl
|
| 38 |
+
- gv
|
| 39 |
+
- gn
|
| 40 |
+
- gu
|
| 41 |
+
- ht
|
| 42 |
+
- ha
|
| 43 |
+
- he
|
| 44 |
+
- hi
|
| 45 |
+
- hr
|
| 46 |
+
- hu
|
| 47 |
+
- hy
|
| 48 |
+
- ig
|
| 49 |
+
- ia
|
| 50 |
+
- id
|
| 51 |
+
- is
|
| 52 |
+
- it
|
| 53 |
+
- jv
|
| 54 |
+
- ja
|
| 55 |
+
- kn
|
| 56 |
+
- ka
|
| 57 |
+
- kk
|
| 58 |
+
- km
|
| 59 |
+
- rw
|
| 60 |
+
- ky
|
| 61 |
+
- ku
|
| 62 |
+
- ko
|
| 63 |
+
- lo
|
| 64 |
+
- la
|
| 65 |
+
- lv
|
| 66 |
+
- ln
|
| 67 |
+
- lt
|
| 68 |
+
- lb
|
| 69 |
+
- lg
|
| 70 |
+
- ml
|
| 71 |
+
- mr
|
| 72 |
+
- mk
|
| 73 |
+
- mg
|
| 74 |
+
- mt
|
| 75 |
+
- mn
|
| 76 |
+
- mi
|
| 77 |
+
- ms
|
| 78 |
+
- my
|
| 79 |
+
- ne
|
| 80 |
+
- nl
|
| 81 |
+
- nn
|
| 82 |
+
- no
|
| 83 |
+
- oc
|
| 84 |
+
- or
|
| 85 |
+
- pa
|
| 86 |
+
- pl
|
| 87 |
+
- pt
|
| 88 |
+
- ps
|
| 89 |
+
- ro
|
| 90 |
+
- ru
|
| 91 |
+
- sa
|
| 92 |
+
- si
|
| 93 |
+
- sl
|
| 94 |
+
- sk
|
| 95 |
+
- sn
|
| 96 |
+
- sd
|
| 97 |
+
- so
|
| 98 |
+
- st
|
| 99 |
+
- es
|
| 100 |
+
- sq
|
| 101 |
+
- sc
|
| 102 |
+
- sr
|
| 103 |
+
- su
|
| 104 |
+
- sw
|
| 105 |
+
- sv
|
| 106 |
+
- ta
|
| 107 |
+
- tt
|
| 108 |
+
- te
|
| 109 |
+
- tg
|
| 110 |
+
- th
|
| 111 |
+
- tn
|
| 112 |
+
- tk
|
| 113 |
+
- tr
|
| 114 |
+
- tw
|
| 115 |
+
- ug
|
| 116 |
+
- uk
|
| 117 |
+
- ur
|
| 118 |
+
- uz
|
| 119 |
+
- vi
|
| 120 |
+
- xh
|
| 121 |
+
- yi
|
| 122 |
+
- yo
|
| 123 |
+
- zh
|
| 124 |
+
---
|
| 125 |
+
**This repository contains the best mHuBERT-147 pre-trained model.**
|
| 126 |
+
|
| 127 |
+
**MODEL DETAILS:** 3rd iteration, K=1000, HuBERT base architecture (95M parameters), 147 languages.
|
| 128 |
+
|
| 129 |
+
# mHuBERT-147 models
|
| 130 |
+
|
| 131 |
+
mHuBERT-147 are compact and competitive multilingual HuBERT models trained on 90K hours of open-license data in 147 languages.
|
| 132 |
+
Different from *traditional* HuBERTs, mHuBERT-147 models are trained using faiss IVF discrete speech units.
|
| 133 |
+
Training employs a two-level language, data source up-sampling during training. See more information in [our paper](https://arxiv.org/pdf/2406.06371).
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# Table of Contents:
|
| 137 |
+
|
| 138 |
+
1. [Summary](https://huggingface.co/utter-project/mHuBERT-147#mhubert-147-models)
|
| 139 |
+
2. [Training Data and Code](https://huggingface.co/utter-project/mHuBERT-147#training)
|
| 140 |
+
3. [ML-SUPERB Scores](https://huggingface.co/utter-project/mHuBERT-147#ml-superb-scores)
|
| 141 |
+
4. [Languages and Datasets](https://huggingface.co/utter-project/mHuBERT-147#languages-and-datasets)
|
| 142 |
+
5. [Intermediate Checkpoints](https://huggingface.co/utter-project/mHuBERT-147#intermediate-checkpoints)
|
| 143 |
+
6. [Citing and Funding Information](https://huggingface.co/utter-project/mHuBERT-147#citing-and-funding-information)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
**This repository contains:**
|
| 147 |
+
* Fairseq checkpoint (original);
|
| 148 |
+
* HuggingFace checkpoint (conversion using transformers library);
|
| 149 |
+
* Faiss index for continuous pre-training (OPQ16_64,IVF1000_HNSW32,PQ16x4fsr).
|
| 150 |
+
|
| 151 |
+
**Related Models:**
|
| 152 |
+
* [2nd Iteration mHuBERT-147](https://huggingface.co/utter-project/mHuBERT-147-base-2nd-iter)
|
| 153 |
+
* [1st Iteration mHuBERT-147](https://huggingface.co/utter-project/mHuBERT-147-base-1st-iter)
|
| 154 |
+
* [CommonVoice Prototype (12 languages)](https://huggingface.co/utter-project/hutter-12-3rd-base)
|
| 155 |
+
|
| 156 |
+
# Training
|
| 157 |
+
|
| 158 |
+
* **[Manifest list available here.](https://huggingface.co/utter-project/mHuBERT-147-base-3rd-iter/tree/main/manifest)** Please note that since training, there were CommonVoice removal requests. This means that some of the listed files are no longer available.
|
| 159 |
+
|
| 160 |
+
* **[Fairseq fork](https://github.com/utter-project/fairseq)** contains the scripts for training with multilingual batching with two-level up-sampling.
|
| 161 |
+
|
| 162 |
+
* **[Scripts for pre-processing/faiss clustering available here.](https://github.com/utter-project/mHuBERT-147-scripts)**
|
| 163 |
+
|
| 164 |
+
# ML-SUPERB Scores
|
| 165 |
+
|
| 166 |
+
mHubert-147 reaches second and first position in the 10min and 1h leaderboards respectively. We achieve new SOTA scores for three LID tasks.
|
| 167 |
+
See more information in [our paper](https://arxiv.org/pdf/2406.06371).
|
| 168 |
+
|
| 169 |
+

|
| 170 |
+
|
| 171 |
+
# Languages and Datasets
|
| 172 |
+
|
| 173 |
+
**Datasets:** For ASR/ST/TTS datasets, only train set is used.
|
| 174 |
+
* [Aishell](https://www.openslr.org/33/) and [AISHELL-3](https://www.openslr.org/93/)
|
| 175 |
+
* [BibleTTS](https://www.openslr.org/129/)
|
| 176 |
+
* [ClovaCall](https://github.com/clovaai/ClovaCall)
|
| 177 |
+
* [CommonVoice v11](https://commonvoice.mozilla.org/en/datasets)
|
| 178 |
+
* Google TTS data: [Javanese](https://www.openslr.org/41/), [Khmer](https://www.openslr.org/42/), [Nepali](https://www.openslr.org/43/), [Sundanese](https://www.openslr.org/44/), [South African Languages](https://www.openslr.org/32/), [Bengali Languages](https://www.openslr.org/37/)
|
| 179 |
+
* IISc-MILE: [Tamil](https://www.openslr.org/127/), [Kannada](https://www.openslr.org/126/)
|
| 180 |
+
* [Japanese Versatile Speech](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvs_corpus)
|
| 181 |
+
* [Kokoro](https://github.com/kaiidams/Kokoro-Speech-Dataset)
|
| 182 |
+
* [Kosp2e](https://github.com/warnikchow/kosp2e)
|
| 183 |
+
* Media Speech: [Turkish Only](https://www.openslr.org/108/)
|
| 184 |
+
* [Multilingual LibriSpeech](https://www.openslr.org/94/)
|
| 185 |
+
* [Samrómur](https://www.openslr.org/128/)
|
| 186 |
+
* [THCHS-30](https://www.openslr.org/18/) and [THUYG-20](https://www.openslr.org/22/)
|
| 187 |
+
* [VoxLingua107](https://bark.phon.ioc.ee/voxlingua107/)
|
| 188 |
+
* [VoxPopuli](https://github.com/facebookresearch/voxpopuli/)
|
| 189 |
+
|
| 190 |
+
**Languages present not indexed by Huggingface:** Asturian (ast), Basaa (bas), Cebuano (ceb), Central Kurdish/Sorani (ckb), Hakha Chin (cnh), Hawaiian (haw), Upper Sorbian (hsb) Kabyle (kab), Moksha (mdf), Meadow Mari (mhr), Hill Mari (mrj), Erzya (myv), Taiwanese Hokkien (nan-tw), Sursilvan (rm-sursilv), Vallader (rm-vallader), Sakha (sah), Santali (sat), Scots (sco), Saraiki (skr), Tigre (tig), Tok Pisin (tpi), Akwapen Twi (tw-akuapem), Asante Twi (tw-asante), Votic (vot), Waray (war), Cantonese (yue).
|
| 191 |
+
|
| 192 |
+
# Intermediate Checkpoints
|
| 193 |
+
|
| 194 |
+
For allowing research in training dynamics, the intermediate checkpoints for the three iterations are made available under the **CC-BY-NC-SA-4.0** license via a protected link.
|
| 195 |
+
|
| 196 |
+
* **Downloading page:** https://download.europe.naverlabs.com/mhubert147/
|
| 197 |
+
* **User:** user
|
| 198 |
+
* **Password:** license mentioned above in bold
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
# Citing and Funding Information
|
| 202 |
+
|
| 203 |
+
```
|
| 204 |
+
@inproceedings{boito2024mhubert,
|
| 205 |
+
author={Boito, Marcely Zanon and Iyer, Vivek and Lagos, Nikolaos and Besacier, Laurent and Calapodescu, Ioan},
|
| 206 |
+
title={{mHuBERT-147: A Compact Multilingual HuBERT Model}},
|
| 207 |
+
year=2024,
|
| 208 |
+
booktitle={Interspeech 2024},
|
| 209 |
+
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/62262e19d36494a6f743a28d/HbzC1C-uHe25ewTy2wyoK.png" width=7% height=7%>
|
| 213 |
+
This is an output of the European Project UTTER (Unified Transcription and Translation for Extended Reality) funded by European Union’s Horizon Europe Research and Innovation programme under grant agreement number 101070631.
|
| 214 |
+
|
| 215 |
+
For more information please visit https://he-utter.eu/
|
| 216 |
+
|
| 217 |
+
NAVER LABS Europe: https://europe.naverlabs.com/
|
checkpoint_best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d88f79300fbd3dec7b0cc8ded2e3535cf09479d198a230babac835f9c274ef8
|
| 3 |
+
size 1138157677
|
config.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "mmHuBERT-3rd-faiss-0.7d-0.9l-1000k-850gb-2M-best",
|
| 3 |
+
"activation_dropout": 0.1,
|
| 4 |
+
"apply_spec_augment": true,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"HubertModel"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.1,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"classifier_proj_size": 256,
|
| 11 |
+
"conv_bias": false,
|
| 12 |
+
"conv_dim": [
|
| 13 |
+
512,
|
| 14 |
+
512,
|
| 15 |
+
512,
|
| 16 |
+
512,
|
| 17 |
+
512,
|
| 18 |
+
512,
|
| 19 |
+
512
|
| 20 |
+
],
|
| 21 |
+
"conv_kernel": [
|
| 22 |
+
10,
|
| 23 |
+
3,
|
| 24 |
+
3,
|
| 25 |
+
3,
|
| 26 |
+
3,
|
| 27 |
+
2,
|
| 28 |
+
2
|
| 29 |
+
],
|
| 30 |
+
"conv_stride": [
|
| 31 |
+
5,
|
| 32 |
+
2,
|
| 33 |
+
2,
|
| 34 |
+
2,
|
| 35 |
+
2,
|
| 36 |
+
2,
|
| 37 |
+
2
|
| 38 |
+
],
|
| 39 |
+
"ctc_loss_reduction": "sum",
|
| 40 |
+
"ctc_zero_infinity": false,
|
| 41 |
+
"do_stable_layer_norm": false,
|
| 42 |
+
"eos_token_id": 2,
|
| 43 |
+
"feat_extract_activation": "gelu",
|
| 44 |
+
"feat_extract_dropout": 0.0,
|
| 45 |
+
"feat_extract_norm": "group",
|
| 46 |
+
"feat_proj_dropout": 0.1,
|
| 47 |
+
"feat_proj_layer_norm": true,
|
| 48 |
+
"final_dropout": 0.1,
|
| 49 |
+
"gradient_checkpointing": false,
|
| 50 |
+
"hidden_act": "gelu",
|
| 51 |
+
"hidden_dropout": 0.1,
|
| 52 |
+
"hidden_dropout_prob": 0.1,
|
| 53 |
+
"hidden_size": 768,
|
| 54 |
+
"initializer_range": 0.02,
|
| 55 |
+
"intermediate_size": 3072,
|
| 56 |
+
"layer_norm_eps": 1e-05,
|
| 57 |
+
"layerdrop": 0.1,
|
| 58 |
+
"mask_feature_length": 10,
|
| 59 |
+
"mask_feature_min_masks": 0,
|
| 60 |
+
"mask_feature_prob": 0.0,
|
| 61 |
+
"mask_time_length": 10,
|
| 62 |
+
"mask_time_min_masks": 2,
|
| 63 |
+
"mask_time_prob": 0.05,
|
| 64 |
+
"model_type": "hubert",
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_conv_pos_embedding_groups": 16,
|
| 67 |
+
"num_conv_pos_embeddings": 128,
|
| 68 |
+
"num_feat_extract_layers": 7,
|
| 69 |
+
"num_hidden_layers": 12,
|
| 70 |
+
"pad_token_id": 0,
|
| 71 |
+
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
| 72 |
+
"torch_dtype": "float32",
|
| 73 |
+
"transformers_version": "4.30.1",
|
| 74 |
+
"use_weighted_layer_sum": false,
|
| 75 |
+
"vocab_size": 32
|
| 76 |
+
}
|
manifest/CV_1.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d448029528a38cc34b76351a24dba88f457982077cdc552eddac5a799449bbb
|
| 3 |
+
size 23524510
|
manifest/CV_10.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46f14f5e1aebc1f53945b6a0965bbd410131a659b6c4d4a963438abd1d0251a8
|
| 3 |
+
size 45518046
|
manifest/CV_11.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:925978f6d03e72252547d6555e755b864f2fbf0d325bb9c300c838193270c25e
|
| 3 |
+
size 70441456
|
manifest/CV_2.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46bc21105546d8712e569b18d2cdc0297678f9ac7e46f6cfcd208891f6de0060
|
| 3 |
+
size 27692393
|
manifest/CV_3.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5610b315716f064a528e21a1f94154a3aee81a85c54713caa144f8714841986a
|
| 3 |
+
size 24904693
|
manifest/CV_4.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:253c1125b1c3772dd85561d6401f2b1805fb2f55e36cfd7d0247dd7e0ebdf5c6
|
| 3 |
+
size 37878320
|
manifest/CV_5.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fec48b8afe104a0e48101a828ee32a1183b2181e23784e3b551743876f0ad45
|
| 3 |
+
size 13098753
|
manifest/CV_6.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b971f9a1154f38e9c80185250546a8977dd4628b3ddc0e978938ddb96cf24e01
|
| 3 |
+
size 56463129
|
manifest/CV_7.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4df7710f3d7bf1cb5e37e617694e7d4ea52cd1bcbd0290851132713d092c1f9
|
| 3 |
+
size 107675610
|
manifest/CV_8.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ef8010bd5f2d887e709f0178f46b2bdfb3a35d31608b7bb16edc8b1915bbe9
|
| 3 |
+
size 94747946
|
manifest/CV_9.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:705b6b829ea759b9e4da9bd31cc8b4516cd383d5f8cb8d63e4024f201bf9df4f
|
| 3 |
+
size 44734175
|
manifest/MLS.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2253dd1a6ed0a50c2812e7d8a22d38fea1f30f9798441013feef14a404cfd42f
|
| 3 |
+
size 87469881
|
manifest/MLS_1.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d75502d848b4579cbd368f3097f19897738a43ffedbdb352c3d88d0fa761cac
|
| 3 |
+
size 26389281
|
manifest/MLS_10.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3084061b1bdc29ed0d7dfc4787404c6fd9114712fa577d97f35a41afab1ad2e
|
| 3 |
+
size 45084545
|
manifest/MLS_11.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:172230fc7f0c1e1242041ca5f5500cca4439730190ad6c3b629241808a96d781
|
| 3 |
+
size 25485271
|
manifest/MLS_12.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e9b467037a1620e80e37145fb3387407ba55a206cec2f9399fb784db6638106
|
| 3 |
+
size 25645643
|
manifest/MLS_13.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf41eb4642a142f7eaed417225ac409f7b361347503dedede48893ba43d9fe40
|
| 3 |
+
size 35549313
|
manifest/MLS_14.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a66d37c6f4b64b30fde25594a73e549dcfaac43341fe85d3c0947f7549135198
|
| 3 |
+
size 27803101
|
manifest/MLS_15.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21a05e582fda9a7a1bc07edb4ce4efc6169b8bf0c9c52220cc92a1623e655db7
|
| 3 |
+
size 31004025
|
manifest/MLS_16.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33fc13c9600c8ff097b05d3a4a401db1e4caa04af2f3545cf6bc316fe4731a5c
|
| 3 |
+
size 30926137
|
manifest/MLS_17.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d90000f27cc79304314f4004aa0479212c0c08ccb603c334191e4126f91aecb2
|
| 3 |
+
size 32022161
|
manifest/MLS_18.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57a398e8b3e99076923b79d0496bfe771049308b5ea76d3aece680d3fe9324f4
|
| 3 |
+
size 44275579
|
manifest/MLS_19.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:799152054e2d4ef46cdd7d350fc4f8412271508dd28b63d7d42688e1047a419a
|
| 3 |
+
size 21145621
|
manifest/MLS_2.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaa2c5406ec4b454d60bec30e91496eb895efc16e69e14920eb9386bdd529bcc
|
| 3 |
+
size 23121823
|
manifest/MLS_20.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69fd196786681f883c9954c2dec9302e72035cb50b0c4df412c5253be740c95f
|
| 3 |
+
size 21617195
|
manifest/MLS_3.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a28b0a81d7be2d075f8e481979341f724b1d0b2729d7f36b342b3cb5915bf588
|
| 3 |
+
size 23913381
|
manifest/MLS_4.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97362b4263eb22c67bb3ba1e3a6dca6c1d731341f8f117173a342f73502a62c1
|
| 3 |
+
size 32341051
|
manifest/MLS_5.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6cb8874c4ee25d67fa0527fab0aa3efc9035474a4f8850f860da3aecdbce9c
|
| 3 |
+
size 25007567
|
manifest/MLS_6.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3053087a4a1ecf91b00df521f75ced518e2a73666c2e01269485bebeb5eefdfc
|
| 3 |
+
size 20856015
|
manifest/MLS_7.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4130cb67e5b3175e204d1ccff29490712fd29d00da5192b2dfbfe3c73ce1e049
|
| 3 |
+
size 22289661
|
manifest/MLS_8.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:605b84699a31aadc3e8cb8c62c1434da9d5524d65ba1d18133928c773feec31d
|
| 3 |
+
size 42954841
|
manifest/MLS_9.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82d1e2ccfd56519c3308716f82ba4ed5816900b64dd702c5b4bc389528699f5e
|
| 3 |
+
size 37217047
|
manifest/VL_1.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_10.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_11.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_12.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8245318dee919ef2381e1ad85c5ef13c0f724d19c1262af6532247d8c37aa47b
|
| 3 |
+
size 13173423
|
manifest/VL_13.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a01efebcdeaa4b49e63872de6564902c921ad6b8cd66bd23ebf5f7caee67679
|
| 3 |
+
size 10813510
|
manifest/VL_14.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_15.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:254151b5c22f0d1fb7fedc204cbfe96ffb9724cb882d56b3a3e374d4e44282d6
|
| 3 |
+
size 14512091
|
manifest/VL_16.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5e2f2ee8e7314726ad44bbbe62061e543483657049ce97b07e644dd75538db4
|
| 3 |
+
size 14569041
|
manifest/VL_2.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd4ceec2a81b37861fa07ca2f0dd9ba3b3cecb454daa8440cc180c48438864cb
|
| 3 |
+
size 11399463
|
manifest/VL_3.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_4.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_5.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_6.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
manifest/VL_7.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|