diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..efd788ecaecd0a171bdd78ff770e3417a76dc517 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +SER-Odyssey/Baseline_Model.pdf filter=lfs diff=lfs merge=lfs -text +SER-Odyssey/Odyssey[[:space:]]2024[[:space:]]-[[:space:]]Speech[[:space:]]Emotion[[:space:]]Recognition[[:space:]]Challenge.[[:space:]]Dataset,[[:space:]]Baseline,[[:space:]]Framework,[[:space:]]and[[:space:]]Results.pdf filter=lfs diff=lfs merge=lfs -text +SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt10_ser_fp16.plan filter=lfs diff=lfs merge=lfs -text +SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt8_ser_dyn_fp16.plan filter=lfs diff=lfs merge=lfs -text +wavlm-large-mnn/wavlm_large_fp16.mnn filter=lfs diff=lfs merge=lfs -text +wavlm-large-mnn/wavlm_large_int8.mnn filter=lfs diff=lfs merge=lfs -text +WavLM.[[:space:]]Large-Scale[[:space:]]Self-Supervised[[:space:]]Pre-Training[[:space:]]for[[:space:]]Full[[:space:]]Stack[[:space:]]Speech[[:space:]]Processing.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/Baseline_Model.pdf b/SER-Odyssey/Baseline_Model.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8d6abc4749d191676f79c8fa94158d93e32cab16 --- /dev/null +++ b/SER-Odyssey/Baseline_Model.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2006e79620902e9b411dd8e110f296c9e7d2458110faa8043d900187f203e103 +size 460836 diff --git a/SER-Odyssey/MSP-Podcast_Challenge [JMasr] +48 -24.zip b/SER-Odyssey/MSP-Podcast_Challenge [JMasr] +48 -24.zip new file mode 100644 index 0000000000000000000000000000000000000000..c195b8977bd34fc44ae72f47518bd369d2027f93 --- /dev/null +++ b/SER-Odyssey/MSP-Podcast_Challenge [JMasr] +48 -24.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53455da505412b271968d94febb011505ef41201826ba048dc7308306838a04 +size 895217 diff --git a/SER-Odyssey/MSP-Podcast_Challenge.zip b/SER-Odyssey/MSP-Podcast_Challenge.zip new file mode 100644 index 0000000000000000000000000000000000000000..7137f8f93fe2da792c750083c557f5adc215f949 --- /dev/null +++ b/SER-Odyssey/MSP-Podcast_Challenge.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86e9700cc05734431656503b7602aa0a5f9b60be4a5a02238e87121324055a7 +size 897745 diff --git a/SER-Odyssey/Odyssey 2024 - Speech Emotion Recognition Challenge. Dataset, Baseline, Framework, and Results.pdf b/SER-Odyssey/Odyssey 2024 - Speech Emotion Recognition Challenge. Dataset, Baseline, Framework, and Results.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ad8b468dc101f2cd1e9c038236fe67afcc1d034e --- /dev/null +++ b/SER-Odyssey/Odyssey 2024 - Speech Emotion Recognition Challenge. Dataset, Baseline, Framework, and Results.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f79ccab188b27218b3c5038fbec0ef21e0dca0d08af3d998e77b993d0ed31c +size 1083858 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/.gitattributes b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/README.md b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/README.md new file mode 100644 index 0000000000000000000000000000000000000000..484beb790a4b7946c63f3f26fe10ca583cf9078a --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/README.md @@ -0,0 +1,83 @@ +--- +license: mit +language: +- en +pipeline_tag: audio-classification +tags: +- wavlm +- msp-podcast +- emotion-recognition +- audio +- speech +- arousal +- lucas +- speech-emotion-recognition +--- +The model was trained on [MSP-Podcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) for the Odyssey 2024 Emotion Recognition competition baseline
+This particular model is the single-task specialized arousal model, which predict arousal in a range of approximately 0...1. + + + +# Benchmarks +CCC based on Test3 and Development sets of the Odyssey Competition + + + + + +
Sinle-Task Setup
Test 3Development
Aro Aro
0.566 0.651
+ + + +For more details: [demo](https://huggingface.co/spaces/3loi/WavLM-SER-Multi-Baseline-Odyssey2024), [paper](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Goncalves_2024.pdf), and [GitHub](https://github.com/MSP-UTD/MSP-Podcast_Challenge/tree/main). + + +``` +@InProceedings{Goncalves_2024, + author={L. Goncalves and A. N. Salman and A. {Reddy Naini} and L. Moro-Velazquez and T. Thebaud and L. {Paola Garcia} and N. Dehak and B. Sisman and C. Busso}, + title={Odyssey2024 - Speech Emotion Recognition Challenge: Dataset, Baseline Framework, and Results}, + booktitle={Odyssey 2024: The Speaker and Language Recognition Workshop)}, + volume={To appear}, + year={2024}, + month={June}, + address = {Quebec, Canada}, +} +``` + + +# Usage +```python +from transformers import AutoModelForAudioClassification +import librosa, torch + +#load model +model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Arousal", trust_remote_code=True) + +#get mean/std +mean = model.config.mean +std = model.config.std + + +#load an audio file +audio_path = "/path/to/audio.wav" +raw_wav, _ = librosa.load(audio_path, sr=model.config.sampling_rate) + +#normalize the audio by mean/std +norm_wav = (raw_wav - mean) / (std+0.000001) + +#generate the mask +mask = torch.ones(1, len(norm_wav)) + +#batch it (add dim) +wavs = torch.tensor(norm_wav).unsqueeze(0) + + +#predict +with torch.no_grad(): + pred = model(wavs, mask) + +print(model.config.id2label) +print(pred) +#{0: 'arousal'} +#tensor([[0.3670]]) +``` \ No newline at end of file diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/config.json b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/config.json new file mode 100644 index 0000000000000000000000000000000000000000..052a06b6d8f49d9e05d7aaeb9bb9fb9f2c64712b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "SERModel" + ], + "auto_map": { + "AutoConfig": "pipeline_utils.SERConfig", + "AutoModelForAudioClassification": "pipeline_utils.SERModel" + }, + "id2label": { + "0": "arousal" + }, + "sampling_rate": 16000, + "maxlen": 192000, + "mean": -8.278621631819787e-05, + "std": 0.08485510250851999, + "classifier_dropout_prob": 0.5, + "classifier_hidden_layers": 1, + "hidden_size": 1024, + "model_type": "ser", + "num_attention_heads": 16, + "num_classes": 1, + "num_hidden_layers": 24, + "ssl_type": "microsoft/wavlm-large", + "torch_dtype": "float32", + "transformers_version": "4.34.0.dev0" +} diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/model.safetensors b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a18eb7cf7da83e3cdc9fcad7f27ed5eab75df7f7 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6513eca66ff2f599b248059ad44c41fef39d61b5cfc4995f777022c42c07106c +size 1274482316 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/pipeline_utils.py b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..55e484d4c1baa1068057fd6ba4de0642f5a43e67 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/pipeline_utils.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel +from transformers.modeling_utils import PreTrainedModel ,PretrainedConfig + + +class Pooling(nn.Module): + def __init__(self): + super().__init__() + def compute_length_from_mask(self, mask): + """ + mask: (batch_size, T) + Assuming that the sampling rate is 16kHz, the frame shift is 20ms + """ + wav_lens = torch.sum(mask, dim=1) # (batch_size, ) + feat_lens = torch.div(wav_lens-1, 16000*0.02, rounding_mode="floor") + 1 + feat_lens = feat_lens.int().tolist() + return feat_lens + + def forward(self, x, mask): + raise NotImplementedError + +class MeanPooling(Pooling): + def __init__(self): + super().__init__() + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + pooled = torch.mean(x[:feat_len], dim=0) # (feat_dim, ) + pooled_list.append(pooled) + pooled = torch.stack(pooled_list, dim=0) # (batch_size, feat_dim) + return pooled + + +class AttentiveStatisticsPooling(Pooling): + """ + AttentiveStatisticsPooling + Paper: Attentive Statistics Pooling for Deep Speaker Embedding + Link: https://arxiv.org/pdf/1803.10963.pdf + """ + def __init__(self, input_size): + super().__init__() + self._indim = input_size + self.sap_linear = nn.Linear(input_size, input_size) + self.attention = nn.Parameter(torch.FloatTensor(input_size, 1)) + torch.nn.init.normal_(self.attention, mean=0, std=1) + + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim*2) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + x = x[:feat_len].unsqueeze(0) + h = torch.tanh(self.sap_linear(x)) + w = torch.matmul(h, self.attention).squeeze(dim=2) + w = F.softmax(w, dim=1).view(x.size(0), x.size(1), 1) + mu = torch.sum(x * w, dim=1) + rh = torch.sqrt((torch.sum((x**2) * w, dim=1) - mu**2).clamp(min=1e-5)) + x = torch.cat((mu, rh), 1).squeeze(0) + pooled_list.append(x) + return torch.stack(pooled_list) + + + + +class EmotionRegression(nn.Module): + def __init__(self, *args, **kwargs): + super(EmotionRegression, self).__init__() + input_dim = args[0] + hidden_dim = args[1] + num_layers = args[2] + output_dim = args[3] + p = kwargs.get("dropout", 0.5) + + self.fc=nn.ModuleList([ + nn.Sequential( + nn.Linear(input_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ]) + for lidx in range(num_layers-1): + self.fc.append( + nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ) + self.out = nn.Sequential( + nn.Linear(hidden_dim, output_dim) + ) + + self.inp_drop = nn.Dropout(p) + def get_repr(self, x): + h = self.inp_drop(x) + for lidx, fc in enumerate(self.fc): + h=fc(h) + return h + + def forward(self, x): + h=self.get_repr(x) + result = self.out(h) + return result + +class SERConfig(PretrainedConfig): + model_type = "ser" + + def __init__( + self, + num_classes: int = 1, + num_attention_heads = 16, + num_hidden_layers = 24, + hidden_size = 1024, + classifier_hidden_layers = 1, + classifier_dropout_prob = 0.5, + ssl_type= "microsoft/wavlm-large", + torch_dtype= "float32", + **kwargs, + ): + self.num_classes = num_classes + self.num_attention_heads = num_attention_heads + self.num_hidden_layers = num_hidden_layers + self.hidden_size = hidden_size + self.classifier_hidden_layers = classifier_hidden_layers + self.classifier_dropout_prob = classifier_dropout_prob + self.ssl_type = ssl_type + self.torch_dtype = torch_dtype + super().__init__(**kwargs) + +class SERModel(PreTrainedModel): + config_class = SERConfig + + def __init__(self, config): + super().__init__(config) + self.ssl_model = AutoModel.from_pretrained(config.ssl_type) + self.ssl_model.freeze_feature_encoder() + + self.pool_model = AttentiveStatisticsPooling(config.hidden_size) + + self.ser_model = EmotionRegression(config.hidden_size*2, + config.hidden_size, + config.classifier_hidden_layers, + config.num_classes, + dropout=config.classifier_dropout_prob) + + + def forward(self, x, mask): + ssl = self.ssl_model(x, attention_mask=mask).last_hidden_state + + ssl = self.pool_model(ssl, mask) + + pred = self.ser_model(ssl) + + return pred + diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/pytorch_model.bin b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b5e9d6be4f32244601bf6fc6aa4e8679c23e84c --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Arousal/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8a77ea0603b9ab91b3ce1d03c165db58d95ebae1c4210ab12dc94459c36b60 +size 1274585617 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/.gitattributes b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/README.md b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ffc9e06115bbce3a02e3717a9dfd434eee49e4ed --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/README.md @@ -0,0 +1,87 @@ +--- +license: mit +language: +- en +pipeline_tag: audio-classification +tags: +- wavlm +- msp-podcast +- emotion-recognition +- audio +- speech +- categorical +- lucas +- speech-emotion-recognition +--- +The model was trained on [MSP-Podcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) for the Odyssey 2024 Emotion Recognition competition baseline
+This particular model is the categorical based model which predicts: "Angry", "Sad", "Happy", "Surprise", "Fear", "Disgust", "Contempt" and "Neutral". + + +# Benchmarks +F1-scores based on Test3 and Development sets of the Odyssey Competition + + + + + +
Categorical Setup
Test 3Development
F1-Mic. F1-Ma. Prec. Rec. F1-Mic. F1-Ma. Prec. Rec.
0.327 0.311 0.332 0.325 0.409 0.307 0.316 0.345
+ + + +For more details: [demo](https://huggingface.co/spaces/3loi/WavLM-SER-Multi-Baseline-Odyssey2024), [paper](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Goncalves_2024.pdf), and [GitHub](https://github.com/MSP-UTD/MSP-Podcast_Challenge/tree/main). + + +``` +@InProceedings{Goncalves_2024, + author={L. Goncalves and A. N. Salman and A. {Reddy Naini} and L. Moro-Velazquez and T. Thebaud and L. {Paola Garcia} and N. Dehak and B. Sisman and C. Busso}, + title={Odyssey2024 - Speech Emotion Recognition Challenge: Dataset, Baseline Framework, and Results}, + booktitle={Odyssey 2024: The Speaker and Language Recognition Workshop)}, + volume={To appear}, + year={2024}, + month={June}, + address = {Quebec, Canada}, +} +``` + + +# Usage +```python +from transformers import AutoModelForAudioClassification +import librosa, torch + +#load model +model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Categorical-Attributes", trust_remote_code=True) + +#get mean/std +mean = model.config.mean +std = model.config.std + + +#load an audio file +audio_path = "/path/to/audio.wav" +raw_wav, _ = librosa.load(audio_path, sr=model.config.sampling_rate) + +#normalize the audio by mean/std +norm_wav = (raw_wav - mean) / (std+0.000001) + +#generate the mask +mask = torch.ones(1, len(norm_wav)) + +#batch it (add dim) +wavs = torch.tensor(norm_wav).unsqueeze(0) + + +#predict +with torch.no_grad(): + pred = model(wavs, mask) + +print(model.config.id2label) +print(pred) +#{0: 'Angry', 1: 'Sad', 2: 'Happy', 3: 'Surprise', 4: 'Fear', 5: 'Disgust', 6: 'Contempt', 7: 'Neutral'} +#tensor([[0.0015, 0.3651, 0.0593, 0.0315, 0.0600, 0.0125, 0.0319, 0.4382]]) + +#convert logits to probability +probabilities = torch.nn.functional.softmax(pred, dim=1) +print(probabilities) +#[[0.0015, 0.3651, 0.0593, 0.0315, 0.0600, 0.0125, 0.0319, 0.4382]] +``` \ No newline at end of file diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/config.json b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7b1aa047133ebc736623ae23a374e07c3c71d2ca --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "SERModel" + ], + "auto_map": { + "AutoConfig": "pipeline_utils.SERConfig", + "AutoModelForAudioClassification": "pipeline_utils.SERModel" + }, + "id2label": { + "0": "Angry", + "1": "Sad", + "2": "Happy", + "3": "Surprise", + "4": "Fear", + "5": "Disgust", + "6": "Contempt", + "7": "Neutral" + }, + "sampling_rate": 16000, + "classifier_dropout_prob": 0.5, + "classifier_hidden_layers": 1, + "hidden_size": 1024, + "mean": -8.278621631819787e-05, + "model_type": "ser", + "num_attention_heads": 16, + "num_classes": 8, + "num_hidden_layers": 24, + "ssl_type": "microsoft/wavlm-large", + "std": 0.08485510250851999, + "torch_dtype": "float32", + "transformers_version": "4.34.0.dev0" +} diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/model.safetensors b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf68f2b6117d241f8efbdbc3f76c485512d5e352 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb52f3f472b6a5a824ac238537fa60bf39a73d74b3fa5f4a4473c012cb3d18f4 +size 1274511016 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/pipeline_utils.py b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f666eaea76ff9b064435df523db9d387c807a71b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/pipeline_utils.py @@ -0,0 +1,171 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel +from transformers.modeling_utils import PreTrainedModel ,PretrainedConfig + + +class Pooling(nn.Module): + def __init__(self): + super().__init__() + def compute_length_from_mask(self, mask): + """ + mask: (batch_size, T) + Assuming that the sampling rate is 16kHz, the frame shift is 20ms + """ + wav_lens = torch.sum(mask, dim=1) # (batch_size, ) + feat_lens = torch.div(wav_lens-1, 16000*0.02, rounding_mode="floor") + 1 + feat_lens = feat_lens.int().tolist() + return feat_lens + + def forward(self, x, mask): + raise NotImplementedError + +class MeanPooling(Pooling): + def __init__(self): + super().__init__() + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + pooled = torch.mean(x[:feat_len], dim=0) # (feat_dim, ) + pooled_list.append(pooled) + pooled = torch.stack(pooled_list, dim=0) # (batch_size, feat_dim) + return pooled + + +class AttentiveStatisticsPooling(Pooling): + """ + AttentiveStatisticsPooling + Paper: Attentive Statistics Pooling for Deep Speaker Embedding + Link: https://arxiv.org/pdf/1803.10963.pdf + """ + def __init__(self, input_size): + super().__init__() + self._indim = input_size + self.sap_linear = nn.Linear(input_size, input_size) + self.attention = nn.Parameter(torch.FloatTensor(input_size, 1)) + torch.nn.init.normal_(self.attention, mean=0, std=1) + + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim*2) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + x = x[:feat_len].unsqueeze(0) + h = torch.tanh(self.sap_linear(x)) + w = torch.matmul(h, self.attention).squeeze(dim=2) + w = F.softmax(w, dim=1).view(x.size(0), x.size(1), 1) + mu = torch.sum(x * w, dim=1) + rh = torch.sqrt((torch.sum((x**2) * w, dim=1) - mu**2).clamp(min=1e-5)) + x = torch.cat((mu, rh), 1).squeeze(0) + pooled_list.append(x) + return torch.stack(pooled_list) + + + + +class EmotionRegression(nn.Module): + def __init__(self, *args, **kwargs): + super(EmotionRegression, self).__init__() + input_dim = args[0] + hidden_dim = args[1] + num_layers = args[2] + output_dim = args[3] + p = kwargs.get("dropout", 0.5) + + self.fc=nn.ModuleList([ + nn.Sequential( + nn.Linear(input_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ]) + for lidx in range(num_layers-1): + self.fc.append( + nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ) + self.out = nn.Sequential( + nn.Linear(hidden_dim, output_dim) + ) + + self.inp_drop = nn.Dropout(p) + def get_repr(self, x): + h = self.inp_drop(x) + for lidx, fc in enumerate(self.fc): + h=fc(h) + return h + + def forward(self, x): + h=self.get_repr(x) + result = self.out(h) + return result + + +class SERConfig(PretrainedConfig): + model_type = "ser" + + def __init__( + self, + num_classes: int = 8, + num_attention_heads = 16, + num_hidden_layers = 24, + hidden_size = 1024, + classifier_hidden_layers = 1, + classifier_dropout_prob = 0.5, + ssl_type= "microsoft/wavlm-large", + torch_dtype= "float32", + mean= -8.278621631819787e-05, + std=0.08485510250851999, + **kwargs, + ): + self.num_classes = num_classes + self.num_attention_heads = num_attention_heads + self.num_hidden_layers = num_hidden_layers + self.hidden_size = hidden_size + self.classifier_hidden_layers = classifier_hidden_layers + self.classifier_dropout_prob = classifier_dropout_prob + self.ssl_type = ssl_type + self.torch_dtype = torch_dtype + + self.mean = mean + self.std = std + super().__init__(**kwargs) + +class SERModel(PreTrainedModel): + config_class = SERConfig + + def __init__(self, config): + super().__init__(config) + self.ssl_model = AutoModel.from_pretrained(config.ssl_type) + self.ssl_model.freeze_feature_encoder() + + self.pool_model = AttentiveStatisticsPooling(config.hidden_size) + + self.ser_model = EmotionRegression(config.hidden_size*2, + config.hidden_size, + config.classifier_hidden_layers, + config.num_classes, + dropout=config.classifier_dropout_prob) + + + def forward(self, x, mask): + ssl = self.ssl_model(x, attention_mask=mask).last_hidden_state + + ssl = self.pool_model(ssl, mask) + + pred = self.ser_model(ssl) + + return pred + diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/pytorch_model.bin b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e34c79f990531bba617d04a5d766e5e73fc0d43 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Categorical/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446f71c92a67b69977c50b065a0e418c37fa20aba1d2e44ecb1190d97f9c0cbb +size 1274614289 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/.gitattributes b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/README.md b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/README.md new file mode 100644 index 0000000000000000000000000000000000000000..193975de9fac693e3a43272490b232ebac4ebfed --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/README.md @@ -0,0 +1,83 @@ +--- +license: mit +language: +- en +pipeline_tag: audio-classification +tags: +- wavlm +- msp-podcast +- emotion-recognition +- audio +- speech +- dominance +- lucas +- speech-emotion-recognition +--- +The model was trained on [MSP-Podcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) for the Odyssey 2024 Emotion Recognition competition baseline
+This particular model is the single-task specialized dominance model, which predict dominance in a range of approximately 0...1. + + + +# Benchmarks +CCC based on Test3 and Development sets of the Odyssey Competition + + + + + +
Sinle-Task Setup
Test 3Development
Dom Dom
0.424 0.584
+ + + +For more details: [demo](https://huggingface.co/spaces/3loi/WavLM-SER-Multi-Baseline-Odyssey2024), [paper](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Goncalves_2024.pdf) and [GitHub](https://github.com/MSP-UTD/MSP-Podcast_Challenge/tree/main). + + +``` +@InProceedings{Goncalves_2024, + author={L. Goncalves and A. N. Salman and A. {Reddy Naini} and L. Moro-Velazquez and T. Thebaud and L. {Paola Garcia} and N. Dehak and B. Sisman and C. Busso}, + title={Odyssey2024 - Speech Emotion Recognition Challenge: Dataset, Baseline Framework, and Results}, + booktitle={Odyssey 2024: The Speaker and Language Recognition Workshop)}, + volume={To appear}, + year={2024}, + month={June}, + address = {Quebec, Canada}, +} +``` + + +# Usage +```python +from transformers import AutoModelForAudioClassification +import librosa, torch + +#load model +model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Dominance", trust_remote_code=True) + +#get mean/std +mean = model.config.mean +std = model.config.std + + +#load an audio file +audio_path = "/path/to/audio.wav" +raw_wav, _ = librosa.load(audio_path, sr=model.config.sampling_rate) + +#normalize the audio by mean/std +norm_wav = (raw_wav - mean) / (std+0.000001) + +#generate the mask +mask = torch.ones(1, len(norm_wav)) + +#batch it (add dim) +wavs = torch.tensor(norm_wav).unsqueeze(0) + + +#predict +with torch.no_grad(): + pred = model(wavs, mask) + +print(model.config.id2label) +print(pred) +#{0: 'dominance'} +#tensor([[0.3670]]) +``` \ No newline at end of file diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/config.json b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a109b574af0c4c2447f4c2078d9e219799df6881 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "SERModel" + ], + "auto_map": { + "AutoConfig": "pipeline_utils.SERConfig", + "AutoModelForAudioClassification": "pipeline_utils.SERModel" + }, + "id2label": { + "0": "dominance" + }, + "sampling_rate": 16000, + "maxlen": 192000, + "mean": -8.278621631819787e-05, + "std": 0.08485510250851999, + "classifier_dropout_prob": 0.5, + "classifier_hidden_layers": 1, + "hidden_size": 1024, + "model_type": "ser", + "num_attention_heads": 16, + "num_classes": 1, + "num_hidden_layers": 24, + "ssl_type": "microsoft/wavlm-large", + "torch_dtype": "float32", + "transformers_version": "4.34.0.dev0" +} diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/model.safetensors b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b9ca2c1914221f6277d7447709f1255f805bdd6 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5279e2387d029fb3c7529830546a876518bc32e264d61a21a593d708c9491e0 +size 1274482316 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/pipeline_utils.py b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..55e484d4c1baa1068057fd6ba4de0642f5a43e67 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/pipeline_utils.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel +from transformers.modeling_utils import PreTrainedModel ,PretrainedConfig + + +class Pooling(nn.Module): + def __init__(self): + super().__init__() + def compute_length_from_mask(self, mask): + """ + mask: (batch_size, T) + Assuming that the sampling rate is 16kHz, the frame shift is 20ms + """ + wav_lens = torch.sum(mask, dim=1) # (batch_size, ) + feat_lens = torch.div(wav_lens-1, 16000*0.02, rounding_mode="floor") + 1 + feat_lens = feat_lens.int().tolist() + return feat_lens + + def forward(self, x, mask): + raise NotImplementedError + +class MeanPooling(Pooling): + def __init__(self): + super().__init__() + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + pooled = torch.mean(x[:feat_len], dim=0) # (feat_dim, ) + pooled_list.append(pooled) + pooled = torch.stack(pooled_list, dim=0) # (batch_size, feat_dim) + return pooled + + +class AttentiveStatisticsPooling(Pooling): + """ + AttentiveStatisticsPooling + Paper: Attentive Statistics Pooling for Deep Speaker Embedding + Link: https://arxiv.org/pdf/1803.10963.pdf + """ + def __init__(self, input_size): + super().__init__() + self._indim = input_size + self.sap_linear = nn.Linear(input_size, input_size) + self.attention = nn.Parameter(torch.FloatTensor(input_size, 1)) + torch.nn.init.normal_(self.attention, mean=0, std=1) + + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim*2) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + x = x[:feat_len].unsqueeze(0) + h = torch.tanh(self.sap_linear(x)) + w = torch.matmul(h, self.attention).squeeze(dim=2) + w = F.softmax(w, dim=1).view(x.size(0), x.size(1), 1) + mu = torch.sum(x * w, dim=1) + rh = torch.sqrt((torch.sum((x**2) * w, dim=1) - mu**2).clamp(min=1e-5)) + x = torch.cat((mu, rh), 1).squeeze(0) + pooled_list.append(x) + return torch.stack(pooled_list) + + + + +class EmotionRegression(nn.Module): + def __init__(self, *args, **kwargs): + super(EmotionRegression, self).__init__() + input_dim = args[0] + hidden_dim = args[1] + num_layers = args[2] + output_dim = args[3] + p = kwargs.get("dropout", 0.5) + + self.fc=nn.ModuleList([ + nn.Sequential( + nn.Linear(input_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ]) + for lidx in range(num_layers-1): + self.fc.append( + nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ) + self.out = nn.Sequential( + nn.Linear(hidden_dim, output_dim) + ) + + self.inp_drop = nn.Dropout(p) + def get_repr(self, x): + h = self.inp_drop(x) + for lidx, fc in enumerate(self.fc): + h=fc(h) + return h + + def forward(self, x): + h=self.get_repr(x) + result = self.out(h) + return result + +class SERConfig(PretrainedConfig): + model_type = "ser" + + def __init__( + self, + num_classes: int = 1, + num_attention_heads = 16, + num_hidden_layers = 24, + hidden_size = 1024, + classifier_hidden_layers = 1, + classifier_dropout_prob = 0.5, + ssl_type= "microsoft/wavlm-large", + torch_dtype= "float32", + **kwargs, + ): + self.num_classes = num_classes + self.num_attention_heads = num_attention_heads + self.num_hidden_layers = num_hidden_layers + self.hidden_size = hidden_size + self.classifier_hidden_layers = classifier_hidden_layers + self.classifier_dropout_prob = classifier_dropout_prob + self.ssl_type = ssl_type + self.torch_dtype = torch_dtype + super().__init__(**kwargs) + +class SERModel(PreTrainedModel): + config_class = SERConfig + + def __init__(self, config): + super().__init__(config) + self.ssl_model = AutoModel.from_pretrained(config.ssl_type) + self.ssl_model.freeze_feature_encoder() + + self.pool_model = AttentiveStatisticsPooling(config.hidden_size) + + self.ser_model = EmotionRegression(config.hidden_size*2, + config.hidden_size, + config.classifier_hidden_layers, + config.num_classes, + dropout=config.classifier_dropout_prob) + + + def forward(self, x, mask): + ssl = self.ssl_model(x, attention_mask=mask).last_hidden_state + + ssl = self.pool_model(ssl, mask) + + pred = self.ser_model(ssl) + + return pred + diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/pytorch_model.bin b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..04da037543ccf55ea4e004c536516ed17dc08e40 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Dominance/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6fc0167d183d89114be10df1c4e4f74040b558408efee99a71fcf5205865ef2 +size 1274585617 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/.gitattributes b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/README.md b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f219cb7833ab853ef964c1d6508aac8264577d96 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/README.md @@ -0,0 +1,84 @@ +--- +license: mit +language: +- en +pipeline_tag: audio-classification +tags: +- wavlm +- msp-podcast +- emotion-recognition +- audio +- speech +- valence +- arousal +- dominance +- lucas +- speech-emotion-recognition +--- +The model was trained on [MSP-Podcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) for the Odyssey 2024 Emotion Recognition competition baseline
+This particular model is the multi-attributed based model which predict arousal, dominance and valence in a range of approximately 0...1. + + +# Benchmarks +CCC based on Test3 and Development sets of the Odyssey Competition + + + + + +
Multi-Task Setup
Test 3Development
Val Dom Aro Val Dom Aro
0.577 0.577 0.405 0.652 0.688 0.579
+ + + +For more details: [demo](https://huggingface.co/spaces/3loi/WavLM-SER-Multi-Baseline-Odyssey2024), [paper](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Goncalves_2024.pdf), and [GitHub](https://github.com/MSP-UTD/MSP-Podcast_Challenge/tree/main). + + +``` +@InProceedings{Goncalves_2024, + author={L. Goncalves and A. N. Salman and A. {Reddy Naini} and L. Moro-Velazquez and T. Thebaud and L. {Paola Garcia} and N. Dehak and B. Sisman and C. Busso}, + title={Odyssey2024 - Speech Emotion Recognition Challenge: Dataset, Baseline Framework, and Results}, + booktitle={Odyssey 2024: The Speaker and Language Recognition Workshop)}, + volume={To appear}, + year={2024}, + month={June}, + address = {Quebec, Canada}, +} +``` + + +# Usage +```python +from transformers import AutoModelForAudioClassification +import librosa, torch + +#load model +model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes", trust_remote_code=True) + +#get mean/std +mean = model.config.mean +std = model.config.std + + +#load an audio file +audio_path = "/path/to/audio.wav" +raw_wav, _ = librosa.load(audio_path, sr=model.config.sampling_rate) + +#normalize the audio by mean/std +norm_wav = (raw_wav - mean) / (std+0.000001) + +#generate the mask +mask = torch.ones(1, len(norm_wav)) + +#batch it (add dim) +wavs = torch.tensor(norm_wav).unsqueeze(0) + + +#predict +with torch.no_grad(): + pred = model(wavs, mask) + +print(model.config.id2label) +print(pred) +#{0: 'arousal', 1: 'dominance', 2: 'valence'} +#tensor([[0.3670, 0.4553, 0.4240]]) +``` \ No newline at end of file diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/config.json b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/config.json new file mode 100644 index 0000000000000000000000000000000000000000..93cf4890b9966e0b63732908679fb31b4ca31a95 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "SERModel" + ], + "auto_map": { + "AutoConfig": "pipeline_utils.SERConfig", + "AutoModelForAudioClassification": "pipeline_utils.SERModel" + }, + "id2label": { + "0": "arousal", + "1": "dominance", + "2": "valence" + }, + "sampling_rate": 16000, + "maxlen": 192000, + "mean": -8.278621631819787e-05, + "std": 0.08485510250851999, + "classifier_dropout_prob": 0.5, + "classifier_hidden_layers": 1, + "hidden_size": 1024, + "model_type": "ser", + "num_attention_heads": 16, + "num_classes": 3, + "num_hidden_layers": 24, + "ssl_type": "microsoft/wavlm-large", + "torch_dtype": "float32", + "transformers_version": "4.34.0.dev0" +} diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/model.safetensors b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff822dd415c0f3158336d95f6f64e94a451f2b51 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557ba9b4aa8461a60bc7f5c5bd2e34b4de34d4c8ccfa684c438b6cbdc1893c9d +size 1274490516 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/pipeline_utils.py b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1137d9d79584ec5fcb0d0992711235f76f347430 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/pipeline_utils.py @@ -0,0 +1,167 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel +from transformers.modeling_utils import PreTrainedModel ,PretrainedConfig + + + + +class Pooling(nn.Module): + def __init__(self): + super().__init__() + def compute_length_from_mask(self, mask): + """ + mask: (batch_size, T) + Assuming that the sampling rate is 16kHz, the frame shift is 20ms + """ + wav_lens = torch.sum(mask, dim=1) # (batch_size, ) + feat_lens = torch.div(wav_lens-1, 16000*0.02, rounding_mode="floor") + 1 + feat_lens = feat_lens.int().tolist() + return feat_lens + + def forward(self, x, mask): + raise NotImplementedError + +class MeanPooling(Pooling): + def __init__(self): + super().__init__() + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + pooled = torch.mean(x[:feat_len], dim=0) # (feat_dim, ) + pooled_list.append(pooled) + pooled = torch.stack(pooled_list, dim=0) # (batch_size, feat_dim) + return pooled + + +class AttentiveStatisticsPooling(Pooling): + """ + AttentiveStatisticsPooling + Paper: Attentive Statistics Pooling for Deep Speaker Embedding + Link: https://arxiv.org/pdf/1803.10963.pdf + """ + def __init__(self, input_size): + super().__init__() + self._indim = input_size + self.sap_linear = nn.Linear(input_size, input_size) + self.attention = nn.Parameter(torch.FloatTensor(input_size, 1)) + torch.nn.init.normal_(self.attention, mean=0, std=1) + + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim*2) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + x = x[:feat_len].unsqueeze(0) + h = torch.tanh(self.sap_linear(x)) + w = torch.matmul(h, self.attention).squeeze(dim=2) + w = F.softmax(w, dim=1).view(x.size(0), x.size(1), 1) + mu = torch.sum(x * w, dim=1) + rh = torch.sqrt((torch.sum((x**2) * w, dim=1) - mu**2).clamp(min=1e-5)) + x = torch.cat((mu, rh), 1).squeeze(0) + pooled_list.append(x) + return torch.stack(pooled_list) + + + + +class EmotionRegression(nn.Module): + def __init__(self, *args, **kwargs): + super(EmotionRegression, self).__init__() + input_dim = args[0] + hidden_dim = args[1] + num_layers = args[2] + output_dim = args[3] + p = kwargs.get("dropout", 0.5) + + self.fc=nn.ModuleList([ + nn.Sequential( + nn.Linear(input_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ]) + for lidx in range(num_layers-1): + self.fc.append( + nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ) + self.out = nn.Sequential( + nn.Linear(hidden_dim, output_dim) + ) + + self.inp_drop = nn.Dropout(p) + def get_repr(self, x): + h = self.inp_drop(x) + for lidx, fc in enumerate(self.fc): + h=fc(h) + return h + + def forward(self, x): + h=self.get_repr(x) + result = self.out(h) + return result + +class SERConfig(PretrainedConfig): + model_type = "ser" + + def __init__( + self, + num_classes: int = 3, + num_attention_heads = 16, + num_hidden_layers = 24, + hidden_size = 1024, + classifier_hidden_layers = 1, + classifier_dropout_prob = 0.5, + ssl_type= "microsoft/wavlm-large", + torch_dtype= "float32", + **kwargs, + ): + self.num_classes = num_classes + self.num_attention_heads = num_attention_heads + self.num_hidden_layers = num_hidden_layers + self.hidden_size = hidden_size + self.classifier_hidden_layers = classifier_hidden_layers + self.classifier_dropout_prob = classifier_dropout_prob + self.ssl_type = ssl_type + self.torch_dtype = torch_dtype + super().__init__(**kwargs) + +class SERModel(PreTrainedModel): + config_class = SERConfig + + def __init__(self, config): + super().__init__(config) + self.ssl_model = AutoModel.from_pretrained(config.ssl_type) + self.ssl_model.freeze_feature_encoder() + + self.pool_model = AttentiveStatisticsPooling(config.hidden_size) + + self.ser_model = EmotionRegression(config.hidden_size*2, + config.hidden_size, + config.classifier_hidden_layers, + config.num_classes, + dropout=config.classifier_dropout_prob) + + + def forward(self, x, mask): + ssl = self.ssl_model(x, attention_mask=mask).last_hidden_state + + ssl = self.pool_model(ssl, mask) + + pred = self.ser_model(ssl) + + return pred + diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/preprocessor_config.json b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f5db79210e7ae7aa906154a3c0c0f3546b1aa68e --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/preprocessor_config.json @@ -0,0 +1,3 @@ +{ + "mean": 10 +} \ No newline at end of file diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/pytorch_model.bin b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..53ee234552516a0e75e6bf48de5cfcfb0908aabb --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Multi-Attributes/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c34b4fd571efce7b4530a7539f1928213d535f6be19b2324bceca0c08c3e601 +size 1274593809 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/.gitattributes b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/README.md b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e56e39a25e13f581d058788f4d61f25ec807a028 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/README.md @@ -0,0 +1,83 @@ +--- +license: mit +language: +- en +pipeline_tag: audio-classification +tags: +- wavlm +- msp-podcast +- emotion-recognition +- audio +- speech +- valence +- lucas +- speech-emotion-recognition +--- +The model was trained on [MSP-Podcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) for the Odyssey 2024 Emotion Recognition competition baseline
+This particular model is the single-task specialized valence model, which predict valence in a range of approximately 0...1. + + + +# Benchmarks +CCC based on Test3 and Development sets of the Odyssey Competition + + + + + +
Sinle-Task Setup
Test 3Development
Val Val
0.607 0.709
+ + + +For more details: [demo](https://huggingface.co/spaces/3loi/WavLM-SER-Multi-Baseline-Odyssey2024), [paper](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Goncalves_2024.pdf), and [GitHub](https://github.com/MSP-UTD/MSP-Podcast_Challenge/tree/main). + + +``` +@InProceedings{Goncalves_2024, + author={L. Goncalves and A. N. Salman and A. {Reddy Naini} and L. Moro-Velazquez and T. Thebaud and L. {Paola Garcia} and N. Dehak and B. Sisman and C. Busso}, + title={Odyssey2024 - Speech Emotion Recognition Challenge: Dataset, Baseline Framework, and Results}, + booktitle={Odyssey 2024: The Speaker and Language Recognition Workshop)}, + volume={To appear}, + year={2024}, + month={June}, + address = {Quebec, Canada}, +} +``` + + +# Usage +```python +from transformers import AutoModelForAudioClassification +import librosa, torch + +#load model +model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Valence", trust_remote_code=True) + +#get mean/std +mean = model.config.mean +std = model.config.std + + +#load an audio file +audio_path = "/path/to/audio.wav" +raw_wav, _ = librosa.load(audio_path, sr=model.config.sampling_rate) + +#normalize the audio by mean/std +norm_wav = (raw_wav - mean) / (std+0.000001) + +#generate the mask +mask = torch.ones(1, len(norm_wav)) + +#batch it (add dim) +wavs = torch.tensor(norm_wav).unsqueeze(0) + + +#predict +with torch.no_grad(): + pred = model(wavs, mask) + +print(model.config.id2label) +print(pred) +#{0: 'valence'} +#tensor([[0.3670]]) +``` \ No newline at end of file diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/config.json b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/config.json new file mode 100644 index 0000000000000000000000000000000000000000..92bbb3d5f8c7310207732c9691535b130a7e21ba --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "SERModel" + ], + "auto_map": { + "AutoConfig": "pipeline_utils.SERConfig", + "AutoModelForAudioClassification": "pipeline_utils.SERModel" + }, + "id2label": { + "0": "valence" + }, + "sampling_rate": 16000, + "maxlen": 192000, + "mean": -8.278621631819787e-05, + "std": 0.08485510250851999, + "classifier_dropout_prob": 0.5, + "classifier_hidden_layers": 1, + "hidden_size": 1024, + "model_type": "ser", + "num_attention_heads": 16, + "num_classes": 1, + "num_hidden_layers": 24, + "ssl_type": "microsoft/wavlm-large", + "torch_dtype": "float32", + "transformers_version": "4.34.0.dev0" +} diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/model.safetensors b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a0fd73e4b5ee1441aa3c063eddc67d82256426b --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44449ad4b46e4af5168f29b25055ca67c28ffd44829d11020782c43712bbc8b3 +size 1274482316 diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/pipeline_utils.py b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..55e484d4c1baa1068057fd6ba4de0642f5a43e67 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/pipeline_utils.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel +from transformers.modeling_utils import PreTrainedModel ,PretrainedConfig + + +class Pooling(nn.Module): + def __init__(self): + super().__init__() + def compute_length_from_mask(self, mask): + """ + mask: (batch_size, T) + Assuming that the sampling rate is 16kHz, the frame shift is 20ms + """ + wav_lens = torch.sum(mask, dim=1) # (batch_size, ) + feat_lens = torch.div(wav_lens-1, 16000*0.02, rounding_mode="floor") + 1 + feat_lens = feat_lens.int().tolist() + return feat_lens + + def forward(self, x, mask): + raise NotImplementedError + +class MeanPooling(Pooling): + def __init__(self): + super().__init__() + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + pooled = torch.mean(x[:feat_len], dim=0) # (feat_dim, ) + pooled_list.append(pooled) + pooled = torch.stack(pooled_list, dim=0) # (batch_size, feat_dim) + return pooled + + +class AttentiveStatisticsPooling(Pooling): + """ + AttentiveStatisticsPooling + Paper: Attentive Statistics Pooling for Deep Speaker Embedding + Link: https://arxiv.org/pdf/1803.10963.pdf + """ + def __init__(self, input_size): + super().__init__() + self._indim = input_size + self.sap_linear = nn.Linear(input_size, input_size) + self.attention = nn.Parameter(torch.FloatTensor(input_size, 1)) + torch.nn.init.normal_(self.attention, mean=0, std=1) + + def forward(self, xs, mask): + """ + xs: (batch_size, T, feat_dim) + mask: (batch_size, T) + + => output: (batch_size, feat_dim*2) + """ + feat_lens = self.compute_length_from_mask(mask) + pooled_list = [] + for x, feat_len in zip(xs, feat_lens): + x = x[:feat_len].unsqueeze(0) + h = torch.tanh(self.sap_linear(x)) + w = torch.matmul(h, self.attention).squeeze(dim=2) + w = F.softmax(w, dim=1).view(x.size(0), x.size(1), 1) + mu = torch.sum(x * w, dim=1) + rh = torch.sqrt((torch.sum((x**2) * w, dim=1) - mu**2).clamp(min=1e-5)) + x = torch.cat((mu, rh), 1).squeeze(0) + pooled_list.append(x) + return torch.stack(pooled_list) + + + + +class EmotionRegression(nn.Module): + def __init__(self, *args, **kwargs): + super(EmotionRegression, self).__init__() + input_dim = args[0] + hidden_dim = args[1] + num_layers = args[2] + output_dim = args[3] + p = kwargs.get("dropout", 0.5) + + self.fc=nn.ModuleList([ + nn.Sequential( + nn.Linear(input_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ]) + for lidx in range(num_layers-1): + self.fc.append( + nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(p) + ) + ) + self.out = nn.Sequential( + nn.Linear(hidden_dim, output_dim) + ) + + self.inp_drop = nn.Dropout(p) + def get_repr(self, x): + h = self.inp_drop(x) + for lidx, fc in enumerate(self.fc): + h=fc(h) + return h + + def forward(self, x): + h=self.get_repr(x) + result = self.out(h) + return result + +class SERConfig(PretrainedConfig): + model_type = "ser" + + def __init__( + self, + num_classes: int = 1, + num_attention_heads = 16, + num_hidden_layers = 24, + hidden_size = 1024, + classifier_hidden_layers = 1, + classifier_dropout_prob = 0.5, + ssl_type= "microsoft/wavlm-large", + torch_dtype= "float32", + **kwargs, + ): + self.num_classes = num_classes + self.num_attention_heads = num_attention_heads + self.num_hidden_layers = num_hidden_layers + self.hidden_size = hidden_size + self.classifier_hidden_layers = classifier_hidden_layers + self.classifier_dropout_prob = classifier_dropout_prob + self.ssl_type = ssl_type + self.torch_dtype = torch_dtype + super().__init__(**kwargs) + +class SERModel(PreTrainedModel): + config_class = SERConfig + + def __init__(self, config): + super().__init__(config) + self.ssl_model = AutoModel.from_pretrained(config.ssl_type) + self.ssl_model.freeze_feature_encoder() + + self.pool_model = AttentiveStatisticsPooling(config.hidden_size) + + self.ser_model = EmotionRegression(config.hidden_size*2, + config.hidden_size, + config.classifier_hidden_layers, + config.num_classes, + dropout=config.classifier_dropout_prob) + + + def forward(self, x, mask): + ssl = self.ssl_model(x, attention_mask=mask).last_hidden_state + + ssl = self.pool_model(ssl, mask) + + pred = self.ser_model(ssl) + + return pred + diff --git a/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/pytorch_model.bin b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7280e3b4e51774b69a9f4e51f42435dbd2ee6784 --- /dev/null +++ b/SER-Odyssey/SER-Odyssey-Baseline-WavLM-Valence/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca8929ea564b56819ed846e96b1a472df11fa39d63f540108bab62c84b269b8 +size 1274585617 diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/.gitattributes b/SER-Odyssey/SER-WavLM-Multi-Attributes/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..20575c60e70ac19e695c05137bdaf832fcd3c909 --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tensorrt/trt10_ser_fp16.plan filter=lfs diff=lfs merge=lfs -text +tensorrt/trt8_ser_dyn_fp16.plan filter=lfs diff=lfs merge=lfs -text diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/README.md b/SER-Odyssey/SER-WavLM-Multi-Attributes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a45a2305aee341793e05b1ee900fc37d74481db4 --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/README.md @@ -0,0 +1,228 @@ +--- +license: mit +language: +- en +pipeline_tag: audio-classification +tags: +- pytorch +- wavlm +- msp-podcast +- emotion-recognition +- audio +- speech +- valence +- arousal +- dominance +- lucas +- speech-emotion-recognition +--- +The model is a recreation of [3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes](https://huggingface.co/3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes) for direct implementation in torch, with class definition and feed forward method. This model was recreated with the hopes of greater flexibilty of control, training/fine-tuning of model. The model was trained on the same [MSP-Podcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) dataset as the original, but a different smaller subset was used. The subset is evenly distributed across gender and emotion category with hopes that training would improve accuracy of valence and arousal predictions. +This model is therefore a multi-attributed based model which predict arousal, dominance and valence. However, unlike the original model, I just kept the original attribute score range of 0...7 (the range the dataset follows). I will provide the evaluations later on. For now I decided to make this repo so that other people could test out my model and see what they think of the inference accuracy themselves, or retrain from scratch, modify etc. My best trained weights s of now are provided in this repo. The class definition for the model is can be found in my [github](https://github.com/PhilipAmadasun/SER-Model-for-dimensional-attribute-prediction#). + +# Get class definition +``` +git clone https://github.com/PhilipAmadasun/SER-Model-for-dimensional-attribute-prediction.git +``` + +# Usage +## Inference Testing +```python +import torch +import torchaudio +from SER_Model_setup import SERModel + +device = "cuda" if torch.cuda.is_available() else "cpu" + +checkpoint_path = "" +checkpoint = torch.load(checkpoint_path, map_location=device) + +# Create the model architecture and load weights +model = SERModel() +model.load_state_dict(checkpoint['model_state_dict']) +model.to(device) +model.eval() + +audio_path = "" +audio, sr = torchaudio.load(audio_path) + +if sr != model.sample_rate: + resampler = torchaudio.transforms.Resample(sr, model.sample_rate) + audio = resampler(audio) +#print(audio.shape[0]) + +if audio.shape[0] > 1: + audio = torch.mean(audio, dim=0, keepdim=True) + +audio_len = audio.shape[-1] + +# Create waveform tensor (shape: [1, audio_len]) +waveform = torch.zeros(1, audio_len, dtype=torch.float32) +# print(waveform) +# print() +# print(f"waveform shape: {waveform.shape}") +# print() +waveform[0, :audio_len] = audio +# print(waveform) +# print() +# Create mask as 2D tensor: shape [1, audio_len] with ones in valid region +mask = torch.ones(1, audio_len, dtype=torch.float32) +# print(mask) +# print() +# print(f"mask shape: {mask.shape}") + +# Move waveform and mask to device +waveform = waveform.to(device) +mask = mask.to(device) + +# Normalize waveform using model's mean and std +mean = model.mean.to(device) +std = model.std.to(device) +waveform = (waveform - mean) / (std + 1e-6) + +with torch.no_grad(): + predictions = model(waveform, mask) # predictions shape: [1, 3] + +# Extract predictions: [0,0] for arousal, [0,1] for valence, [0,2] for dominance +arousal = predictions[0, 0].item() +valence = predictions[0, 1].item() +dominance = predictions[0, 2].item() + +print(f"Arousal: {arousal:.3f}") +print(f"Valence: {valence:.3f}") +print(f"Dominance: {dominance:.3f}") +``` +## Batch inference +```python +import os +import glob +import torch +import torchaudio +from SER_Model_setup import SERModel # Adjust if your model code is elsewhere + +def load_model_from_checkpoint(checkpoint_path, device='cpu'): + """ + Loads the SERModel and weights from a checkpoint, moves to device, sets eval mode. + """ + checkpoint = torch.load(checkpoint_path, map_location=device) + + # Create the model architecture + model = SERModel() + model.load_state_dict(checkpoint['model_state_dict']) + + model.to(device) + model.eval() + return model + +def batch_inference(model, file_paths, device='cpu', normalize=True): + """ + Perform true batch inference on multiple .wav files in one forward pass. + + Args: + model (SERModel): The loaded SER model in eval mode + file_paths (list[str]): List of paths to .wav files + device (str or torch.device): 'cpu' or 'cuda' + normalize (bool): Whether to normalize waveforms (subtract mean, divide std) + + Returns: + dict: {filename: {"arousal": float, "valence": float, "dominance": float}} + """ + + # ---------------------------------------- + # 1) Load & store all waveforms in memory + # ---------------------------------------- + waveforms_list = [] + lengths = [] + for fp in file_paths: + # Load audio + audio, sr = torchaudio.load(fp) + + # Resample if needed + if sr != model.sample_rate: + resampler = torchaudio.transforms.Resample(sr, model.sample_rate) + audio = resampler(audio) + + # Convert stereo -> mono if needed + if audio.shape[0] > 1: + audio = torch.mean(audio, dim=0, keepdim=True) + + # audio shape => [1, num_samples] + lengths.append(audio.shape[-1]) + waveforms_list.append(audio) + + # ---------------------------------------- + # 2) Determine max length + # ---------------------------------------- + max_len = max(lengths) + + # ---------------------------------------- + # 3) Pad each waveform to max length & build masks + # ---------------------------------------- + batch_size = len(waveforms_list) + batched_waveforms = torch.zeros(batch_size, 1, max_len, dtype=torch.float32) + masks = torch.zeros(batch_size, max_len, dtype=torch.float32) + + for i, audio in enumerate(waveforms_list): + cur_len = audio.shape[-1] + batched_waveforms[i, :, :cur_len] = audio + masks[i, :cur_len] = 1.0 # valid portion + + # ---------------------------------------- + # 4) Move batched data to device BEFORE normalization + # ---------------------------------------- + batched_waveforms = batched_waveforms.to(device) + masks = masks.to(device) + + # ---------------------------------------- + # 5) Normalize if needed (model.mean, model.std) + # ---------------------------------------- + if normalize: + # model.mean and model.std are buffers; ensure they're on the correct device + mean = model.mean.to(device) + std = model.std.to(device) + batched_waveforms = (batched_waveforms - mean) / (std + 1e-6) + + # ---------------------------------------- + # 6) Single forward pass + # ---------------------------------------- + with torch.no_grad(): + predictions = model(batched_waveforms, masks) + # predictions shape => [batch_size, 3] + + # ---------------------------------------- + # 7) Build result dict + # ---------------------------------------- + results = {} + for i, fp in enumerate(file_paths): + arousal = predictions[i, 0].item() + valence = predictions[i, 1].item() + dominance = predictions[i, 2].item() + filename = os.path.basename(fp) + results[filename] = { + "arousal": arousal, + "valence": valence, + "dominance": dominance + } + + return results + +if __name__ == "__main__": + # ----------------------------------------- + # Example usage + # ----------------------------------------- + device = "cuda" if torch.cuda.is_available() else "cpu" + + checkpoint_path = "" + model = load_model_from_checkpoint(checkpoint_path, device=device) + + # Suppose you have a folder of .wav files + wav_folder = "" + wav_paths = glob.glob(os.path.join(wav_folder, "*.wav")) + + # Do a single pass of batch inference + all_results = batch_inference(model, wav_paths, device=device, normalize=True) + + # Print results + for fname, preds in all_results.items(): + print(f"{fname}: Arousal={preds['arousal']:.3f}, " + f"Valence={preds['valence']:.3f}, Dominance={preds['dominance']:.3f}") +``` \ No newline at end of file diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/onnx/ReadMe b/SER-Odyssey/SER-WavLM-Multi-Attributes/onnx/ReadMe new file mode 100644 index 0000000000000000000000000000000000000000..3e05295886842a9f7dd32fe7d1d5b1f17769e452 --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/onnx/ReadMe @@ -0,0 +1 @@ +model in onnx format \ No newline at end of file diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/onnx/ser_dyn.onnx b/SER-Odyssey/SER-WavLM-Multi-Attributes/onnx/ser_dyn.onnx new file mode 100644 index 0000000000000000000000000000000000000000..534fd0478ae34ec3e3eb1060a052fd102d47bd86 --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/onnx/ser_dyn.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad8465907c9dcfaa47628d7e8401a281396c17fe17b3c8b72071279cb6b2cac +size 1274295745 diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/pytorch/best_weights.pt b/SER-Odyssey/SER-WavLM-Multi-Attributes/pytorch/best_weights.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd49b3bd641df39c7a1fe002d42c10057bbd2036 --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/pytorch/best_weights.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:809f5b3ef98835b5ca9dcf9d0efb4bd6cf0a9cc458cfb9443ae07ef71b44f670 +size 1299851786 diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/source.txt b/SER-Odyssey/SER-WavLM-Multi-Attributes/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..5bea27781cf14391610214c5cfa4a89362763f6b --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/source.txt @@ -0,0 +1 @@ +https://huggingface.co/uyiosa/SER-WavLM-Multi-Attributes \ No newline at end of file diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/ReadMe b/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/ReadMe new file mode 100644 index 0000000000000000000000000000000000000000..71fbf0424e76a187657fb3653efe71581a4a6b23 --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/ReadMe @@ -0,0 +1,2 @@ +trt10 -- compiled with TensorRT version 10 +trt8 -- comiled with TensorRT version 8 \ No newline at end of file diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt10_ser_fp16.plan b/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt10_ser_fp16.plan new file mode 100644 index 0000000000000000000000000000000000000000..1ac4fcd81c0404f5d09af776c59ada6bc29b8aed --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt10_ser_fp16.plan @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed792e1cd7a6e6f1d89413b5800da2b7328c40483d8532d8b0bc2e74444e0516 +size 644044452 diff --git a/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt8_ser_dyn_fp16.plan b/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt8_ser_dyn_fp16.plan new file mode 100644 index 0000000000000000000000000000000000000000..7f9303e53bbc00094868daf02781f499936cbddb --- /dev/null +++ b/SER-Odyssey/SER-WavLM-Multi-Attributes/tensorrt/trt8_ser_dyn_fp16.plan @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e157bb3cc07c0e808a93815a478874a092cd9f172abf83c1822ce9b9f1d55d +size 643712772 diff --git a/SER-Odyssey/source.txt b/SER-Odyssey/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3f0e6c4a8687457541a8d683cc8a17020436d18 --- /dev/null +++ b/SER-Odyssey/source.txt @@ -0,0 +1 @@ +https://huggingface.co/3loi/models \ No newline at end of file diff --git a/WavLM. Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing.pdf b/WavLM. Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e7c89dc38b76bb84a8e754a1d9397f935f62c2e5 --- /dev/null +++ b/WavLM. Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca8836ebdf8236e610187738217d4c91c5ead13873472e476423f1561e9238e +size 929604 diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/.gitattributes b/tiny-random-WavLMForAudioFrameClassification-ONNX/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/config.json b/tiny-random-WavLMForAudioFrameClassification-ONNX/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9611018bdd2630d1bdcb9275d975ad14cfe0488 --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/config.json @@ -0,0 +1,88 @@ +{ + "_attn_implementation_autoset": true, + "_name_or_path": "hf-internal-testing/tiny-random-WavLMForAudioFrameClassification", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForAudioFrameClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 32, + 32, + 32 + ], + "conv_kernel": [ + 8, + 8, + 8 + ], + "conv_stride": [ + 4, + 4, + 4 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "final_dropout": 0.1, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 20, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 2, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 2, + "num_conv_pos_embeddings": 16, + "num_ctc_classes": 80, + "num_feat_extract_layers": 3, + "num_hidden_layers": 4, + "num_negatives": 100, + "output_hidden_size": 16, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 1 + ], + "tdnn_dim": [ + 32, + 32 + ], + "tdnn_kernel": [ + 3, + 3 + ], + "transformers_version": "4.48.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 32 +} diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/onnx/model.onnx b/tiny-random-WavLMForAudioFrameClassification-ONNX/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..59d8b51bc61a6160f55fb16251355d777e974a3c --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d68f66c0eb42e09d03c533d705eabde0fd481635fdff874b9d94ae4445b550 +size 276448 diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/preprocessor_config.json b/tiny-random-WavLMForAudioFrameClassification-ONNX/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/source.txt b/tiny-random-WavLMForAudioFrameClassification-ONNX/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb650a60df175778538afa36a9ae82ea86c3630d --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/source.txt @@ -0,0 +1 @@ +https://huggingface.co/onnx-internal-testing/tiny-random-WavLMForAudioFrameClassification-ONNX \ No newline at end of file diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/special_tokens_map.json b/tiny-random-WavLMForAudioFrameClassification-ONNX/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/tokenizer_config.json b/tiny-random-WavLMForAudioFrameClassification-ONNX/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..15b28f29732f29bf6afe8663b49b330f63ce2794 --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/tokenizer_config.json @@ -0,0 +1,51 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "3": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "do_lower_case": false, + "do_normalize": true, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 9223372036854775807, + "pad_token": "", + "processor_class": "Wav2Vec2Processor", + "replace_word_delimiter_char": " ", + "return_attention_mask": false, + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "", + "word_delimiter_token": "|" +} diff --git a/tiny-random-WavLMForAudioFrameClassification-ONNX/vocab.json b/tiny-random-WavLMForAudioFrameClassification-ONNX/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..7efc55a6619a53189e1ca2b5bdcca400a2d4f3e6 --- /dev/null +++ b/tiny-random-WavLMForAudioFrameClassification-ONNX/vocab.json @@ -0,0 +1,34 @@ +{ + "'": 27, + "": 2, + "": 0, + "": 1, + "": 3, + "A": 7, + "B": 24, + "C": 19, + "D": 14, + "E": 5, + "F": 20, + "G": 21, + "H": 11, + "I": 10, + "J": 29, + "K": 26, + "L": 15, + "M": 17, + "N": 9, + "O": 8, + "P": 23, + "Q": 30, + "R": 13, + "S": 12, + "T": 6, + "U": 16, + "V": 25, + "W": 18, + "X": 28, + "Y": 22, + "Z": 31, + "|": 4 +} diff --git a/tiny-random-WavLMForCTC-ONNX/.gitattributes b/tiny-random-WavLMForCTC-ONNX/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/tiny-random-WavLMForCTC-ONNX/config.json b/tiny-random-WavLMForCTC-ONNX/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3ba575559378cd6732b599b1f59d80b1bfd0452c --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/config.json @@ -0,0 +1,89 @@ +{ + "_attn_implementation_autoset": true, + "_name_or_path": "hf-internal-testing/tiny-random-WavLMForCTC", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 32, + 32, + 32 + ], + "conv_kernel": [ + 8, + 8, + 8 + ], + "conv_stride": [ + 4, + 4, + 4 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 20, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 2, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 2, + "num_conv_pos_embeddings": 16, + "num_ctc_classes": 80, + "num_feat_extract_layers": 3, + "num_hidden_layers": 4, + "num_negatives": 100, + "output_hidden_size": 16, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 1 + ], + "tdnn_dim": [ + 32, + 32 + ], + "tdnn_kernel": [ + 3, + 3 + ], + "transformers_version": "4.48.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 32 +} diff --git a/tiny-random-WavLMForCTC-ONNX/onnx/model.onnx b/tiny-random-WavLMForCTC-ONNX/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9e79bcdbf64196bfed00b38ec161284f07e8e906 --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fdfc28f5ab94b485dc531f52458254e55cef8ebe044c9ac4c3b7b768747bcc +size 278401 diff --git a/tiny-random-WavLMForCTC-ONNX/preprocessor_config.json b/tiny-random-WavLMForCTC-ONNX/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/tiny-random-WavLMForCTC-ONNX/source.txt b/tiny-random-WavLMForCTC-ONNX/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7ecdead41fe4c9d35fd7f1859f1b2be79c1334d --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/source.txt @@ -0,0 +1 @@ +https://huggingface.co/onnx-internal-testing/tiny-random-WavLMForCTC-ONNX \ No newline at end of file diff --git a/tiny-random-WavLMForCTC-ONNX/special_tokens_map.json b/tiny-random-WavLMForCTC-ONNX/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tiny-random-WavLMForCTC-ONNX/tokenizer_config.json b/tiny-random-WavLMForCTC-ONNX/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f27aad8f8d4422d8078147872a0b0071498cc5e3 --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/tokenizer_config.json @@ -0,0 +1,51 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "3": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "do_normalize": true, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 9223372036854775807, + "pad_token": "", + "processor_class": "Wav2Vec2Processor", + "replace_word_delimiter_char": " ", + "return_attention_mask": false, + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "", + "word_delimiter_token": "|" +} diff --git a/tiny-random-WavLMForCTC-ONNX/vocab.json b/tiny-random-WavLMForCTC-ONNX/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..7efc55a6619a53189e1ca2b5bdcca400a2d4f3e6 --- /dev/null +++ b/tiny-random-WavLMForCTC-ONNX/vocab.json @@ -0,0 +1,34 @@ +{ + "'": 27, + "": 2, + "": 0, + "": 1, + "": 3, + "A": 7, + "B": 24, + "C": 19, + "D": 14, + "E": 5, + "F": 20, + "G": 21, + "H": 11, + "I": 10, + "J": 29, + "K": 26, + "L": 15, + "M": 17, + "N": 9, + "O": 8, + "P": 23, + "Q": 30, + "R": 13, + "S": 12, + "T": 6, + "U": 16, + "V": 25, + "W": 18, + "X": 28, + "Y": 22, + "Z": 31, + "|": 4 +} diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/.gitattributes b/tiny-random-WavLMForSequenceClassification-ONNX/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/config.json b/tiny-random-WavLMForSequenceClassification-ONNX/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a141cb4b841bd44eedcde7336db04de313bb50fb --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/config.json @@ -0,0 +1,89 @@ +{ + "_attn_implementation_autoset": true, + "_name_or_path": "hf-internal-testing/tiny-random-WavLMForSequenceClassification", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 32, + 32, + 32 + ], + "conv_kernel": [ + 8, + 8, + 8 + ], + "conv_stride": [ + 4, + 4, + 4 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 20, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 2, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 2, + "num_conv_pos_embeddings": 16, + "num_ctc_classes": 80, + "num_feat_extract_layers": 3, + "num_hidden_layers": 4, + "num_negatives": 100, + "output_hidden_size": 16, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 1 + ], + "tdnn_dim": [ + 32, + 32 + ], + "tdnn_kernel": [ + 3, + 3 + ], + "transformers_version": "4.48.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 32 +} diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/onnx/model.onnx b/tiny-random-WavLMForSequenceClassification-ONNX/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2741f1725191bd54ad4026dc6478e5e1d1a7b6c2 --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323eef8b59de2019cebc3b0ad2eeb250a6a93b35ad5cafd6fa95a02129375e75 +size 296076 diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/preprocessor_config.json b/tiny-random-WavLMForSequenceClassification-ONNX/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/source.txt b/tiny-random-WavLMForSequenceClassification-ONNX/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..e768c3c455da723c23d1e84c265b15e0495c41e3 --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/source.txt @@ -0,0 +1 @@ +https://huggingface.co/onnx-internal-testing/tiny-random-WavLMForSequenceClassification-ONNX \ No newline at end of file diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/special_tokens_map.json b/tiny-random-WavLMForSequenceClassification-ONNX/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/tokenizer_config.json b/tiny-random-WavLMForSequenceClassification-ONNX/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f27aad8f8d4422d8078147872a0b0071498cc5e3 --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/tokenizer_config.json @@ -0,0 +1,51 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "3": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "do_normalize": true, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 9223372036854775807, + "pad_token": "", + "processor_class": "Wav2Vec2Processor", + "replace_word_delimiter_char": " ", + "return_attention_mask": false, + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "", + "word_delimiter_token": "|" +} diff --git a/tiny-random-WavLMForSequenceClassification-ONNX/vocab.json b/tiny-random-WavLMForSequenceClassification-ONNX/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..7efc55a6619a53189e1ca2b5bdcca400a2d4f3e6 --- /dev/null +++ b/tiny-random-WavLMForSequenceClassification-ONNX/vocab.json @@ -0,0 +1,34 @@ +{ + "'": 27, + "": 2, + "": 0, + "": 1, + "": 3, + "A": 7, + "B": 24, + "C": 19, + "D": 14, + "E": 5, + "F": 20, + "G": 21, + "H": 11, + "I": 10, + "J": 29, + "K": 26, + "L": 15, + "M": 17, + "N": 9, + "O": 8, + "P": 23, + "Q": 30, + "R": 13, + "S": 12, + "T": 6, + "U": 16, + "V": 25, + "W": 18, + "X": 28, + "Y": 22, + "Z": 31, + "|": 4 +} diff --git a/tiny-random-WavLMForXVector-ONNX/.gitattributes b/tiny-random-WavLMForXVector-ONNX/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/tiny-random-WavLMForXVector-ONNX/config.json b/tiny-random-WavLMForXVector-ONNX/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fb3e3fa7d5945255442f33d004808596f90ab08 --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/config.json @@ -0,0 +1,89 @@ +{ + "_attn_implementation_autoset": true, + "_name_or_path": "hf-internal-testing/tiny-random-WavLMForXVector", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForXVector" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 32, + 32, + 32 + ], + "conv_kernel": [ + 8, + 8, + 8 + ], + "conv_stride": [ + 4, + 4, + 4 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 20, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 2, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 2, + "num_conv_pos_embeddings": 16, + "num_ctc_classes": 80, + "num_feat_extract_layers": 3, + "num_hidden_layers": 4, + "num_negatives": 100, + "output_hidden_size": 16, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 1 + ], + "tdnn_dim": [ + 32, + 32 + ], + "tdnn_kernel": [ + 3, + 3 + ], + "transformers_version": "4.48.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 32 +} diff --git a/tiny-random-WavLMForXVector-ONNX/onnx/model.onnx b/tiny-random-WavLMForXVector-ONNX/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c1ebb7e5644c809d86e940e722744d6636270a9b --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2839fd41f630f95b3430882388a7ce2412b68ff1fedb5057d7efd33bd56738 +size 318360 diff --git a/tiny-random-WavLMForXVector-ONNX/preprocessor_config.json b/tiny-random-WavLMForXVector-ONNX/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/tiny-random-WavLMForXVector-ONNX/source.txt b/tiny-random-WavLMForXVector-ONNX/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..f2702f018c6dac09fcfb60f2a839816d6a1c3a3a --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/source.txt @@ -0,0 +1 @@ +https://huggingface.co/onnx-internal-testing/tiny-random-WavLMForXVector-ONNX \ No newline at end of file diff --git a/tiny-random-WavLMForXVector-ONNX/special_tokens_map.json b/tiny-random-WavLMForXVector-ONNX/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tiny-random-WavLMForXVector-ONNX/tokenizer_config.json b/tiny-random-WavLMForXVector-ONNX/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f27aad8f8d4422d8078147872a0b0071498cc5e3 --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/tokenizer_config.json @@ -0,0 +1,51 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "3": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "do_normalize": true, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 9223372036854775807, + "pad_token": "", + "processor_class": "Wav2Vec2Processor", + "replace_word_delimiter_char": " ", + "return_attention_mask": false, + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "", + "word_delimiter_token": "|" +} diff --git a/tiny-random-WavLMForXVector-ONNX/vocab.json b/tiny-random-WavLMForXVector-ONNX/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..7efc55a6619a53189e1ca2b5bdcca400a2d4f3e6 --- /dev/null +++ b/tiny-random-WavLMForXVector-ONNX/vocab.json @@ -0,0 +1,34 @@ +{ + "'": 27, + "": 2, + "": 0, + "": 1, + "": 3, + "A": 7, + "B": 24, + "C": 19, + "D": 14, + "E": 5, + "F": 20, + "G": 21, + "H": 11, + "I": 10, + "J": 29, + "K": 26, + "L": 15, + "M": 17, + "N": 9, + "O": 8, + "P": 23, + "Q": 30, + "R": 13, + "S": 12, + "T": 6, + "U": 16, + "V": 25, + "W": 18, + "X": 28, + "Y": 22, + "Z": 31, + "|": 4 +} diff --git a/tiny-random-WavLMModel-ONNX/.gitattributes b/tiny-random-WavLMModel-ONNX/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/tiny-random-WavLMModel-ONNX/config.json b/tiny-random-WavLMModel-ONNX/config.json new file mode 100644 index 0000000000000000000000000000000000000000..09855869f1cd165966833c731a9e9cb0b55a54d1 --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/config.json @@ -0,0 +1,89 @@ +{ + "_attn_implementation_autoset": true, + "_name_or_path": "hf-internal-testing/tiny-random-WavLMModel", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMModel" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 32, + 32, + 32 + ], + "conv_kernel": [ + 8, + 8, + 8 + ], + "conv_stride": [ + 4, + 4, + 4 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 20, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 2, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 2, + "num_conv_pos_embeddings": 16, + "num_ctc_classes": 80, + "num_feat_extract_layers": 3, + "num_hidden_layers": 4, + "num_negatives": 100, + "output_hidden_size": 16, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 1 + ], + "tdnn_dim": [ + 32, + 32 + ], + "tdnn_kernel": [ + 3, + 3 + ], + "transformers_version": "4.48.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 32 +} diff --git a/tiny-random-WavLMModel-ONNX/onnx/model.onnx b/tiny-random-WavLMModel-ONNX/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b5537f53418feceea0f8eb8b91c9a655ab5a7447 --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be31e057f4d9fc52f1affa472d6856912de7ebf3e480d563e71319cd5af43c04 +size 259029 diff --git a/tiny-random-WavLMModel-ONNX/preprocessor_config.json b/tiny-random-WavLMModel-ONNX/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/tiny-random-WavLMModel-ONNX/source.txt b/tiny-random-WavLMModel-ONNX/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..684c32be3a6bfef778326b9803359dd6e69f38bf --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/source.txt @@ -0,0 +1 @@ +https://huggingface.co/onnx-internal-testing/tiny-random-WavLMModel-ONNX \ No newline at end of file diff --git a/tiny-random-WavLMModel-ONNX/special_tokens_map.json b/tiny-random-WavLMModel-ONNX/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tiny-random-WavLMModel-ONNX/tokenizer_config.json b/tiny-random-WavLMModel-ONNX/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f27aad8f8d4422d8078147872a0b0071498cc5e3 --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/tokenizer_config.json @@ -0,0 +1,51 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "3": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "do_normalize": true, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 9223372036854775807, + "pad_token": "", + "processor_class": "Wav2Vec2Processor", + "replace_word_delimiter_char": " ", + "return_attention_mask": false, + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "", + "word_delimiter_token": "|" +} diff --git a/tiny-random-WavLMModel-ONNX/vocab.json b/tiny-random-WavLMModel-ONNX/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..7efc55a6619a53189e1ca2b5bdcca400a2d4f3e6 --- /dev/null +++ b/tiny-random-WavLMModel-ONNX/vocab.json @@ -0,0 +1,34 @@ +{ + "'": 27, + "": 2, + "": 0, + "": 1, + "": 3, + "A": 7, + "B": 24, + "C": 19, + "D": 14, + "E": 5, + "F": 20, + "G": 21, + "H": 11, + "I": 10, + "J": 29, + "K": 26, + "L": 15, + "M": 17, + "N": 9, + "O": 8, + "P": 23, + "Q": 30, + "R": 13, + "S": 12, + "T": 6, + "U": 16, + "V": 25, + "W": 18, + "X": 28, + "Y": 22, + "Z": 31, + "|": 4 +} diff --git a/wavlm-base-plus-sd/.gitattributes b/wavlm-base-plus-sd/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-base-plus-sd/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-base-plus-sd/README.md b/wavlm-base-plus-sd/README.md new file mode 100644 index 0000000000000000000000000000000000000000..26f934720b330e1403977bfedf22f84139aeea90 --- /dev/null +++ b/wavlm-base-plus-sd/README.md @@ -0,0 +1,8 @@ +--- +base_model: microsoft/wavlm-base-plus-sd +library_name: transformers.js +--- + +https://huggingface.co/microsoft/wavlm-base-plus-sd with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/wavlm-base-plus-sd/config.json b/wavlm-base-plus-sd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..31824b703e42b8618cb82356e4ea062c346137d9 --- /dev/null +++ b/wavlm-base-plus-sd/config.json @@ -0,0 +1,121 @@ +{ + "_name_or_path": "microsoft/wavlm-base-plus-sd", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForAudioFrameClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.05, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "replace_prob": 0.5, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "transformers_version": "4.33.2", + "use_weighted_layer_sum": true, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/wavlm-base-plus-sd/onnx/model.onnx b/wavlm-base-plus-sd/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1683427ba8c8eb1be87fc2c9b7b5b5427b60f6cd --- /dev/null +++ b/wavlm-base-plus-sd/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a2c86146a20e49fdc5ba23acb6832636b2d9e8b4f79dd20eae148508dd7ad72 +size 377989447 diff --git a/wavlm-base-plus-sd/onnx/model_quantized.onnx b/wavlm-base-plus-sd/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..be722c3cfcc0bdfcfcf6af9ea38bc7ee262933f4 --- /dev/null +++ b/wavlm-base-plus-sd/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ddf26911202979b5092c2b7e821ce65dc45dff9aa4c16ed1e2d156281528da +size 95495292 diff --git a/wavlm-base-plus-sd/preprocessor_config.json b/wavlm-base-plus-sd/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/wavlm-base-plus-sd/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wavlm-base-plus-sd/quantize_config.json b/wavlm-base-plus-sd/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2bc6c4d4cf804f9dd17476c3900bbe18ccbbb2b --- /dev/null +++ b/wavlm-base-plus-sd/quantize_config.json @@ -0,0 +1,45 @@ +{ + "per_channel": false, + "reduce_range": false, + "per_model_config": { + "model": { + "op_types": [ + "Softmax", + "Where", + "ReduceMean", + "Conv", + "Mul", + "Sub", + "Concat", + "Unsqueeze", + "Div", + "Transpose", + "InstanceNormalization", + "Cast", + "Less", + "Tile", + "Log", + "Range", + "ReduceSum", + "Expand", + "Reshape", + "Min", + "Sigmoid", + "Slice", + "Add", + "Pow", + "Erf", + "Greater", + "Abs", + "Gather", + "Shape", + "Sqrt", + "Constant", + "MatMul", + "ConstantOfShape", + "Gemm" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/wavlm-base-plus-sd/source.txt b/wavlm-base-plus-sd/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..49d38fd5a7398f730b5f17f4020bcfbf8d0c6de6 --- /dev/null +++ b/wavlm-base-plus-sd/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/wavlm-base-plus-sd \ No newline at end of file diff --git a/wavlm-base-plus-sv/.gitattributes b/wavlm-base-plus-sv/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-base-plus-sv/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-base-plus-sv/README.md b/wavlm-base-plus-sv/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab9d3afcd78184d0855961217cce3e0ac863442 --- /dev/null +++ b/wavlm-base-plus-sv/README.md @@ -0,0 +1,47 @@ +--- +base_model: microsoft/wavlm-base-plus-sv +library_name: transformers.js +--- + +https://huggingface.co/microsoft/wavlm-base-plus-sv with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: +```bash +npm i @xenova/transformers +``` + +**Example:** Speaker verification w/ `Xenova/wavlm-base-plus-sv`. + +```js +import { AutoProcessor, AutoModel, read_audio, cos_sim } from '@xenova/transformers'; + +// Load processor and model +const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sv'); +const model = await AutoModel.from_pretrained('Xenova/wavlm-base-plus-sv'); + +// Helper function to compute speaker embedding from audio URL +async function compute_embedding(url) { + const audio = await read_audio(url, 16000); + const inputs = await processor(audio); + const { embeddings } = await model(inputs); + return embeddings.data; +} + +// Generate speaker embeddings +const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/sv_speaker'; +const speaker_1_1 = await compute_embedding(`${BASE_URL}-1_1.wav`); +const speaker_1_2 = await compute_embedding(`${BASE_URL}-1_2.wav`); +const speaker_2_1 = await compute_embedding(`${BASE_URL}-2_1.wav`); +const speaker_2_2 = await compute_embedding(`${BASE_URL}-2_2.wav`); + +// Compute similarity scores +console.log(cos_sim(speaker_1_1, speaker_1_2)); // 0.959439158881247 (Both are speaker 1) +console.log(cos_sim(speaker_1_2, speaker_2_1)); // 0.618130172602329 (Different speakers) +console.log(cos_sim(speaker_2_1, speaker_2_2)); // 0.962999814169370 (Both are speaker 2) +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/wavlm-base-plus-sv/config.json b/wavlm-base-plus-sv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c6e4240654c7fdbc805678f3df7782d80ac6813 --- /dev/null +++ b/wavlm-base-plus-sv/config.json @@ -0,0 +1,2547 @@ +{ + "_name_or_path": "microsoft/wavlm-base-plus-sv", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForXVector" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2", + "3": "LABEL_3", + "4": "LABEL_4", + "5": "LABEL_5", + "6": "LABEL_6", + "7": "LABEL_7", + "8": "LABEL_8", + "9": "LABEL_9", + "10": "LABEL_10", + "11": "LABEL_11", + "12": "LABEL_12", + "13": "LABEL_13", + "14": "LABEL_14", + "15": "LABEL_15", + "16": "LABEL_16", + "17": "LABEL_17", + "18": "LABEL_18", + "19": "LABEL_19", + "20": "LABEL_20", + "21": "LABEL_21", + "22": "LABEL_22", + "23": "LABEL_23", + "24": "LABEL_24", + "25": "LABEL_25", + "26": "LABEL_26", + "27": "LABEL_27", + "28": "LABEL_28", + "29": "LABEL_29", + "30": "LABEL_30", + "31": "LABEL_31", + "32": "LABEL_32", + "33": "LABEL_33", + "34": "LABEL_34", + "35": "LABEL_35", + "36": "LABEL_36", + "37": "LABEL_37", + "38": "LABEL_38", + "39": "LABEL_39", + "40": "LABEL_40", + "41": "LABEL_41", + "42": "LABEL_42", + "43": "LABEL_43", + "44": "LABEL_44", + "45": "LABEL_45", + "46": "LABEL_46", + "47": "LABEL_47", + "48": "LABEL_48", + "49": "LABEL_49", + "50": "LABEL_50", + "51": "LABEL_51", + "52": "LABEL_52", + "53": "LABEL_53", + "54": "LABEL_54", + "55": "LABEL_55", + "56": "LABEL_56", + "57": "LABEL_57", + "58": "LABEL_58", + "59": "LABEL_59", + "60": "LABEL_60", + "61": "LABEL_61", + "62": "LABEL_62", + "63": "LABEL_63", + "64": "LABEL_64", + "65": "LABEL_65", + "66": "LABEL_66", + "67": "LABEL_67", + "68": "LABEL_68", + "69": "LABEL_69", + "70": "LABEL_70", + "71": "LABEL_71", + "72": "LABEL_72", + "73": "LABEL_73", + "74": "LABEL_74", + "75": "LABEL_75", + "76": "LABEL_76", + "77": "LABEL_77", + "78": "LABEL_78", + "79": "LABEL_79", + "80": "LABEL_80", + "81": "LABEL_81", + "82": "LABEL_82", + "83": "LABEL_83", + "84": "LABEL_84", + "85": "LABEL_85", + "86": "LABEL_86", + "87": "LABEL_87", + "88": "LABEL_88", + "89": "LABEL_89", + "90": "LABEL_90", + "91": "LABEL_91", + "92": "LABEL_92", + "93": "LABEL_93", + "94": "LABEL_94", + "95": "LABEL_95", + "96": "LABEL_96", + "97": "LABEL_97", + "98": "LABEL_98", + "99": "LABEL_99", + "100": "LABEL_100", + "101": "LABEL_101", + "102": "LABEL_102", + "103": "LABEL_103", + "104": "LABEL_104", + "105": "LABEL_105", + "106": "LABEL_106", + "107": "LABEL_107", + "108": "LABEL_108", + "109": "LABEL_109", + "110": "LABEL_110", + "111": "LABEL_111", + "112": "LABEL_112", + "113": "LABEL_113", + "114": "LABEL_114", + "115": "LABEL_115", + "116": "LABEL_116", + "117": "LABEL_117", + "118": "LABEL_118", + "119": "LABEL_119", + "120": "LABEL_120", + "121": "LABEL_121", + "122": "LABEL_122", + "123": "LABEL_123", + "124": "LABEL_124", + "125": "LABEL_125", + "126": "LABEL_126", + "127": "LABEL_127", + "128": "LABEL_128", + "129": "LABEL_129", + "130": "LABEL_130", + "131": "LABEL_131", + "132": "LABEL_132", + "133": "LABEL_133", + "134": "LABEL_134", + "135": "LABEL_135", + "136": "LABEL_136", + "137": "LABEL_137", + "138": "LABEL_138", + "139": "LABEL_139", + "140": "LABEL_140", + "141": "LABEL_141", + "142": "LABEL_142", + "143": "LABEL_143", + "144": "LABEL_144", + "145": "LABEL_145", + "146": "LABEL_146", + "147": "LABEL_147", + "148": "LABEL_148", + "149": "LABEL_149", + "150": "LABEL_150", + "151": "LABEL_151", + "152": "LABEL_152", + "153": "LABEL_153", + "154": "LABEL_154", + "155": "LABEL_155", + "156": "LABEL_156", + "157": "LABEL_157", + "158": "LABEL_158", + "159": "LABEL_159", + "160": "LABEL_160", + "161": "LABEL_161", + "162": "LABEL_162", + "163": "LABEL_163", + "164": "LABEL_164", + "165": "LABEL_165", + "166": "LABEL_166", + "167": "LABEL_167", + "168": "LABEL_168", + "169": "LABEL_169", + "170": "LABEL_170", + "171": "LABEL_171", + "172": "LABEL_172", + "173": "LABEL_173", + "174": "LABEL_174", + "175": "LABEL_175", + "176": "LABEL_176", + "177": "LABEL_177", + "178": "LABEL_178", + "179": "LABEL_179", + "180": "LABEL_180", + "181": "LABEL_181", + "182": "LABEL_182", + "183": "LABEL_183", + "184": "LABEL_184", + "185": "LABEL_185", + "186": "LABEL_186", + "187": "LABEL_187", + "188": "LABEL_188", + "189": "LABEL_189", + "190": "LABEL_190", + "191": "LABEL_191", + "192": "LABEL_192", + "193": "LABEL_193", + "194": "LABEL_194", + "195": "LABEL_195", + "196": "LABEL_196", + "197": "LABEL_197", + "198": "LABEL_198", + "199": "LABEL_199", + "200": "LABEL_200", + "201": "LABEL_201", + "202": "LABEL_202", + "203": "LABEL_203", + "204": "LABEL_204", + "205": "LABEL_205", + "206": "LABEL_206", + "207": "LABEL_207", + "208": "LABEL_208", + "209": "LABEL_209", + "210": "LABEL_210", + "211": "LABEL_211", + "212": "LABEL_212", + "213": "LABEL_213", + "214": "LABEL_214", + "215": "LABEL_215", + "216": "LABEL_216", + "217": "LABEL_217", + "218": "LABEL_218", + "219": "LABEL_219", + "220": "LABEL_220", + "221": "LABEL_221", + "222": "LABEL_222", + "223": "LABEL_223", + "224": "LABEL_224", + "225": "LABEL_225", + "226": "LABEL_226", + "227": "LABEL_227", + "228": "LABEL_228", + "229": "LABEL_229", + "230": "LABEL_230", + "231": "LABEL_231", + "232": "LABEL_232", + "233": "LABEL_233", + "234": "LABEL_234", + "235": "LABEL_235", + "236": "LABEL_236", + "237": "LABEL_237", + "238": "LABEL_238", + "239": "LABEL_239", + "240": "LABEL_240", + "241": "LABEL_241", + "242": "LABEL_242", + "243": "LABEL_243", + "244": "LABEL_244", + "245": "LABEL_245", + "246": "LABEL_246", + "247": "LABEL_247", + "248": "LABEL_248", + "249": "LABEL_249", + "250": "LABEL_250", + "251": "LABEL_251", + "252": "LABEL_252", + "253": "LABEL_253", + "254": "LABEL_254", + "255": "LABEL_255", + "256": "LABEL_256", + "257": "LABEL_257", + "258": "LABEL_258", + "259": "LABEL_259", + "260": "LABEL_260", + "261": "LABEL_261", + "262": "LABEL_262", + "263": "LABEL_263", + "264": "LABEL_264", + "265": "LABEL_265", + "266": "LABEL_266", + "267": "LABEL_267", + "268": "LABEL_268", + "269": "LABEL_269", + "270": "LABEL_270", + "271": "LABEL_271", + "272": "LABEL_272", + "273": "LABEL_273", + "274": "LABEL_274", + "275": "LABEL_275", + "276": "LABEL_276", + "277": "LABEL_277", + "278": "LABEL_278", + "279": "LABEL_279", + "280": "LABEL_280", + "281": "LABEL_281", + "282": "LABEL_282", + "283": "LABEL_283", + "284": "LABEL_284", + "285": "LABEL_285", + "286": "LABEL_286", + "287": "LABEL_287", + "288": "LABEL_288", + "289": "LABEL_289", + "290": "LABEL_290", + "291": "LABEL_291", + "292": "LABEL_292", + "293": "LABEL_293", + "294": "LABEL_294", + "295": "LABEL_295", + "296": "LABEL_296", + "297": "LABEL_297", + "298": "LABEL_298", + "299": "LABEL_299", + "300": "LABEL_300", + "301": "LABEL_301", + "302": "LABEL_302", + "303": "LABEL_303", + "304": "LABEL_304", + "305": "LABEL_305", + "306": "LABEL_306", + "307": "LABEL_307", + "308": "LABEL_308", + "309": "LABEL_309", + "310": "LABEL_310", + "311": "LABEL_311", + "312": "LABEL_312", + "313": "LABEL_313", + "314": "LABEL_314", + "315": "LABEL_315", + "316": "LABEL_316", + "317": "LABEL_317", + "318": "LABEL_318", + "319": "LABEL_319", + "320": "LABEL_320", + "321": "LABEL_321", + "322": "LABEL_322", + "323": "LABEL_323", + "324": "LABEL_324", + "325": "LABEL_325", + "326": "LABEL_326", + "327": "LABEL_327", + "328": "LABEL_328", + "329": "LABEL_329", + "330": "LABEL_330", + "331": "LABEL_331", + "332": "LABEL_332", + "333": "LABEL_333", + "334": "LABEL_334", + "335": "LABEL_335", + "336": "LABEL_336", + "337": "LABEL_337", + "338": "LABEL_338", + "339": "LABEL_339", + "340": "LABEL_340", + "341": "LABEL_341", + "342": "LABEL_342", + "343": "LABEL_343", + "344": "LABEL_344", + "345": "LABEL_345", + "346": "LABEL_346", + "347": "LABEL_347", + "348": "LABEL_348", + "349": "LABEL_349", + "350": "LABEL_350", + "351": "LABEL_351", + "352": "LABEL_352", + "353": "LABEL_353", + "354": "LABEL_354", + "355": "LABEL_355", + "356": "LABEL_356", + "357": "LABEL_357", + "358": "LABEL_358", + "359": "LABEL_359", + "360": "LABEL_360", + "361": "LABEL_361", + "362": "LABEL_362", + "363": "LABEL_363", + "364": "LABEL_364", + "365": "LABEL_365", + "366": "LABEL_366", + "367": "LABEL_367", + "368": "LABEL_368", + "369": "LABEL_369", + "370": "LABEL_370", + "371": "LABEL_371", + "372": "LABEL_372", + "373": "LABEL_373", + "374": "LABEL_374", + "375": "LABEL_375", + "376": "LABEL_376", + "377": "LABEL_377", + "378": "LABEL_378", + "379": "LABEL_379", + "380": "LABEL_380", + "381": "LABEL_381", + "382": "LABEL_382", + "383": "LABEL_383", + "384": "LABEL_384", + "385": "LABEL_385", + "386": "LABEL_386", + "387": "LABEL_387", + "388": "LABEL_388", + "389": "LABEL_389", + "390": "LABEL_390", + "391": "LABEL_391", + "392": "LABEL_392", + "393": "LABEL_393", + "394": "LABEL_394", + "395": "LABEL_395", + "396": "LABEL_396", + "397": "LABEL_397", + "398": "LABEL_398", + "399": "LABEL_399", + "400": "LABEL_400", + "401": "LABEL_401", + "402": "LABEL_402", + "403": "LABEL_403", + "404": "LABEL_404", + "405": "LABEL_405", + "406": "LABEL_406", + "407": "LABEL_407", + "408": "LABEL_408", + "409": "LABEL_409", + "410": "LABEL_410", + "411": "LABEL_411", + "412": "LABEL_412", + "413": "LABEL_413", + "414": "LABEL_414", + "415": "LABEL_415", + "416": "LABEL_416", + "417": "LABEL_417", + "418": "LABEL_418", + "419": "LABEL_419", + "420": "LABEL_420", + "421": "LABEL_421", + "422": "LABEL_422", + "423": "LABEL_423", + "424": "LABEL_424", + "425": "LABEL_425", + "426": "LABEL_426", + "427": "LABEL_427", + "428": "LABEL_428", + "429": "LABEL_429", + "430": "LABEL_430", + "431": "LABEL_431", + "432": "LABEL_432", + "433": "LABEL_433", + "434": "LABEL_434", + "435": "LABEL_435", + "436": "LABEL_436", + "437": "LABEL_437", + "438": "LABEL_438", + "439": "LABEL_439", + "440": "LABEL_440", + "441": "LABEL_441", + "442": "LABEL_442", + "443": "LABEL_443", + "444": "LABEL_444", + "445": "LABEL_445", + "446": "LABEL_446", + "447": "LABEL_447", + "448": "LABEL_448", + "449": "LABEL_449", + "450": "LABEL_450", + "451": "LABEL_451", + "452": "LABEL_452", + "453": "LABEL_453", + "454": "LABEL_454", + "455": "LABEL_455", + "456": "LABEL_456", + "457": "LABEL_457", + "458": "LABEL_458", + "459": "LABEL_459", + "460": "LABEL_460", + "461": "LABEL_461", + "462": "LABEL_462", + "463": "LABEL_463", + "464": "LABEL_464", + "465": "LABEL_465", + "466": "LABEL_466", + "467": "LABEL_467", + "468": "LABEL_468", + "469": "LABEL_469", + "470": "LABEL_470", + "471": "LABEL_471", + "472": "LABEL_472", + "473": "LABEL_473", + "474": "LABEL_474", + "475": "LABEL_475", + "476": "LABEL_476", + "477": "LABEL_477", + "478": "LABEL_478", + "479": "LABEL_479", + "480": "LABEL_480", + "481": "LABEL_481", + "482": "LABEL_482", + "483": "LABEL_483", + "484": "LABEL_484", + "485": "LABEL_485", + "486": "LABEL_486", + "487": "LABEL_487", + "488": "LABEL_488", + "489": "LABEL_489", + "490": "LABEL_490", + "491": "LABEL_491", + "492": "LABEL_492", + "493": "LABEL_493", + "494": "LABEL_494", + "495": "LABEL_495", + "496": "LABEL_496", + "497": "LABEL_497", + "498": "LABEL_498", + "499": "LABEL_499", + "500": "LABEL_500", + "501": "LABEL_501", + "502": "LABEL_502", + "503": "LABEL_503", + "504": "LABEL_504", + "505": "LABEL_505", + "506": "LABEL_506", + "507": "LABEL_507", + "508": "LABEL_508", + "509": "LABEL_509", + "510": "LABEL_510", + "511": "LABEL_511", + "512": "LABEL_512", + "513": "LABEL_513", + "514": "LABEL_514", + "515": "LABEL_515", + "516": "LABEL_516", + "517": "LABEL_517", + "518": "LABEL_518", + "519": "LABEL_519", + "520": "LABEL_520", + "521": "LABEL_521", + "522": "LABEL_522", + "523": "LABEL_523", + "524": "LABEL_524", + "525": "LABEL_525", + "526": "LABEL_526", + "527": "LABEL_527", + "528": "LABEL_528", + "529": "LABEL_529", + "530": "LABEL_530", + "531": "LABEL_531", + "532": "LABEL_532", + "533": "LABEL_533", + "534": "LABEL_534", + "535": "LABEL_535", + "536": "LABEL_536", + "537": "LABEL_537", + "538": "LABEL_538", + "539": "LABEL_539", + "540": "LABEL_540", + "541": "LABEL_541", + "542": "LABEL_542", + "543": "LABEL_543", + "544": "LABEL_544", + "545": "LABEL_545", + "546": "LABEL_546", + "547": "LABEL_547", + "548": "LABEL_548", + "549": "LABEL_549", + "550": "LABEL_550", + "551": "LABEL_551", + "552": "LABEL_552", + "553": "LABEL_553", + "554": "LABEL_554", + "555": "LABEL_555", + "556": "LABEL_556", + "557": "LABEL_557", + "558": "LABEL_558", + "559": "LABEL_559", + "560": "LABEL_560", + "561": "LABEL_561", + "562": "LABEL_562", + "563": "LABEL_563", + "564": "LABEL_564", + "565": "LABEL_565", + "566": "LABEL_566", + "567": "LABEL_567", + "568": "LABEL_568", + "569": "LABEL_569", + "570": "LABEL_570", + "571": "LABEL_571", + "572": "LABEL_572", + "573": "LABEL_573", + "574": "LABEL_574", + "575": "LABEL_575", + "576": "LABEL_576", + "577": "LABEL_577", + "578": "LABEL_578", + "579": "LABEL_579", + "580": "LABEL_580", + "581": "LABEL_581", + "582": "LABEL_582", + "583": "LABEL_583", + "584": "LABEL_584", + "585": "LABEL_585", + "586": "LABEL_586", + "587": "LABEL_587", + "588": "LABEL_588", + "589": "LABEL_589", + "590": "LABEL_590", + "591": "LABEL_591", + "592": "LABEL_592", + "593": "LABEL_593", + "594": "LABEL_594", + "595": "LABEL_595", + "596": "LABEL_596", + "597": "LABEL_597", + "598": "LABEL_598", + "599": "LABEL_599", + "600": "LABEL_600", + "601": "LABEL_601", + "602": "LABEL_602", + "603": "LABEL_603", + "604": "LABEL_604", + "605": "LABEL_605", + "606": "LABEL_606", + "607": "LABEL_607", + "608": "LABEL_608", + "609": "LABEL_609", + "610": "LABEL_610", + "611": "LABEL_611", + "612": "LABEL_612", + "613": "LABEL_613", + "614": "LABEL_614", + "615": "LABEL_615", + "616": "LABEL_616", + "617": "LABEL_617", + "618": "LABEL_618", + "619": "LABEL_619", + "620": "LABEL_620", + "621": "LABEL_621", + "622": "LABEL_622", + "623": "LABEL_623", + "624": "LABEL_624", + "625": "LABEL_625", + "626": "LABEL_626", + "627": "LABEL_627", + "628": "LABEL_628", + "629": "LABEL_629", + "630": "LABEL_630", + "631": "LABEL_631", + "632": "LABEL_632", + "633": "LABEL_633", + "634": "LABEL_634", + "635": "LABEL_635", + "636": "LABEL_636", + "637": "LABEL_637", + "638": "LABEL_638", + "639": "LABEL_639", + "640": "LABEL_640", + "641": "LABEL_641", + "642": "LABEL_642", + "643": "LABEL_643", + "644": "LABEL_644", + "645": "LABEL_645", + "646": "LABEL_646", + "647": "LABEL_647", + "648": "LABEL_648", + "649": "LABEL_649", + "650": "LABEL_650", + "651": "LABEL_651", + "652": "LABEL_652", + "653": "LABEL_653", + "654": "LABEL_654", + "655": "LABEL_655", + "656": "LABEL_656", + "657": "LABEL_657", + "658": "LABEL_658", + "659": "LABEL_659", + "660": "LABEL_660", + "661": "LABEL_661", + "662": "LABEL_662", + "663": "LABEL_663", + "664": "LABEL_664", + "665": "LABEL_665", + "666": "LABEL_666", + "667": "LABEL_667", + "668": "LABEL_668", + "669": "LABEL_669", + "670": "LABEL_670", + "671": "LABEL_671", + "672": "LABEL_672", + "673": "LABEL_673", + "674": "LABEL_674", + "675": "LABEL_675", + "676": "LABEL_676", + "677": "LABEL_677", + "678": "LABEL_678", + "679": "LABEL_679", + "680": "LABEL_680", + "681": "LABEL_681", + "682": "LABEL_682", + "683": "LABEL_683", + "684": "LABEL_684", + "685": "LABEL_685", + "686": "LABEL_686", + "687": "LABEL_687", + "688": "LABEL_688", + "689": "LABEL_689", + "690": "LABEL_690", + "691": "LABEL_691", + "692": "LABEL_692", + "693": "LABEL_693", + "694": "LABEL_694", + "695": "LABEL_695", + "696": "LABEL_696", + "697": "LABEL_697", + "698": "LABEL_698", + "699": "LABEL_699", + "700": "LABEL_700", + "701": "LABEL_701", + "702": "LABEL_702", + "703": "LABEL_703", + "704": "LABEL_704", + "705": "LABEL_705", + "706": "LABEL_706", + "707": "LABEL_707", + "708": "LABEL_708", + "709": "LABEL_709", + "710": "LABEL_710", + "711": "LABEL_711", + "712": "LABEL_712", + "713": "LABEL_713", + "714": "LABEL_714", + "715": "LABEL_715", + "716": "LABEL_716", + "717": "LABEL_717", + "718": "LABEL_718", + "719": "LABEL_719", + "720": "LABEL_720", + "721": "LABEL_721", + "722": "LABEL_722", + "723": "LABEL_723", + "724": "LABEL_724", + "725": "LABEL_725", + "726": "LABEL_726", + "727": "LABEL_727", + "728": "LABEL_728", + "729": "LABEL_729", + "730": "LABEL_730", + "731": "LABEL_731", + "732": "LABEL_732", + "733": "LABEL_733", + "734": "LABEL_734", + "735": "LABEL_735", + "736": "LABEL_736", + "737": "LABEL_737", + "738": "LABEL_738", + "739": "LABEL_739", + "740": "LABEL_740", + "741": "LABEL_741", + "742": "LABEL_742", + "743": "LABEL_743", + "744": "LABEL_744", + "745": "LABEL_745", + "746": "LABEL_746", + "747": "LABEL_747", + "748": "LABEL_748", + "749": "LABEL_749", + "750": "LABEL_750", + "751": "LABEL_751", + "752": "LABEL_752", + "753": "LABEL_753", + "754": "LABEL_754", + "755": "LABEL_755", + "756": "LABEL_756", + "757": "LABEL_757", + "758": "LABEL_758", + "759": "LABEL_759", + "760": "LABEL_760", + "761": "LABEL_761", + "762": "LABEL_762", + "763": "LABEL_763", + "764": "LABEL_764", + "765": "LABEL_765", + "766": "LABEL_766", + "767": "LABEL_767", + "768": "LABEL_768", + "769": "LABEL_769", + "770": "LABEL_770", + "771": "LABEL_771", + "772": "LABEL_772", + "773": "LABEL_773", + "774": "LABEL_774", + "775": "LABEL_775", + "776": "LABEL_776", + "777": "LABEL_777", + "778": "LABEL_778", + "779": "LABEL_779", + "780": "LABEL_780", + "781": "LABEL_781", + "782": "LABEL_782", + "783": "LABEL_783", + "784": "LABEL_784", + "785": "LABEL_785", + "786": "LABEL_786", + "787": "LABEL_787", + "788": "LABEL_788", + "789": "LABEL_789", + "790": "LABEL_790", + "791": "LABEL_791", + "792": "LABEL_792", + "793": "LABEL_793", + "794": "LABEL_794", + "795": "LABEL_795", + "796": "LABEL_796", + "797": "LABEL_797", + "798": "LABEL_798", + "799": "LABEL_799", + "800": "LABEL_800", + "801": "LABEL_801", + "802": "LABEL_802", + "803": "LABEL_803", + "804": "LABEL_804", + "805": "LABEL_805", + "806": "LABEL_806", + "807": "LABEL_807", + "808": "LABEL_808", + "809": "LABEL_809", + "810": "LABEL_810", + "811": "LABEL_811", + "812": "LABEL_812", + "813": "LABEL_813", + "814": "LABEL_814", + "815": "LABEL_815", + "816": "LABEL_816", + "817": "LABEL_817", + "818": "LABEL_818", + "819": "LABEL_819", + "820": "LABEL_820", + "821": "LABEL_821", + "822": "LABEL_822", + "823": "LABEL_823", + "824": "LABEL_824", + "825": "LABEL_825", + "826": "LABEL_826", + "827": "LABEL_827", + "828": "LABEL_828", + "829": "LABEL_829", + "830": "LABEL_830", + "831": "LABEL_831", + "832": "LABEL_832", + "833": "LABEL_833", + "834": "LABEL_834", + "835": "LABEL_835", + "836": "LABEL_836", + "837": "LABEL_837", + "838": "LABEL_838", + "839": "LABEL_839", + "840": "LABEL_840", + "841": "LABEL_841", + "842": "LABEL_842", + "843": "LABEL_843", + "844": "LABEL_844", + "845": "LABEL_845", + "846": "LABEL_846", + "847": "LABEL_847", + "848": "LABEL_848", + "849": "LABEL_849", + "850": "LABEL_850", + "851": "LABEL_851", + "852": "LABEL_852", + "853": "LABEL_853", + "854": "LABEL_854", + "855": "LABEL_855", + "856": "LABEL_856", + "857": "LABEL_857", + "858": "LABEL_858", + "859": "LABEL_859", + "860": "LABEL_860", + "861": "LABEL_861", + "862": "LABEL_862", + "863": "LABEL_863", + "864": "LABEL_864", + "865": "LABEL_865", + "866": "LABEL_866", + "867": "LABEL_867", + "868": "LABEL_868", + "869": "LABEL_869", + "870": "LABEL_870", + "871": "LABEL_871", + "872": "LABEL_872", + "873": "LABEL_873", + "874": "LABEL_874", + "875": "LABEL_875", + "876": "LABEL_876", + "877": "LABEL_877", + "878": "LABEL_878", + "879": "LABEL_879", + "880": "LABEL_880", + "881": "LABEL_881", + "882": "LABEL_882", + "883": "LABEL_883", + "884": "LABEL_884", + "885": "LABEL_885", + "886": "LABEL_886", + "887": "LABEL_887", + "888": "LABEL_888", + "889": "LABEL_889", + "890": "LABEL_890", + "891": "LABEL_891", + "892": "LABEL_892", + "893": "LABEL_893", + "894": "LABEL_894", + "895": "LABEL_895", + "896": "LABEL_896", + "897": "LABEL_897", + "898": "LABEL_898", + "899": "LABEL_899", + "900": "LABEL_900", + "901": "LABEL_901", + "902": "LABEL_902", + "903": "LABEL_903", + "904": "LABEL_904", + "905": "LABEL_905", + "906": "LABEL_906", + "907": "LABEL_907", + "908": "LABEL_908", + "909": "LABEL_909", + "910": "LABEL_910", + "911": "LABEL_911", + "912": "LABEL_912", + "913": "LABEL_913", + "914": "LABEL_914", + "915": "LABEL_915", + "916": "LABEL_916", + "917": "LABEL_917", + "918": "LABEL_918", + "919": "LABEL_919", + "920": "LABEL_920", + "921": "LABEL_921", + "922": "LABEL_922", + "923": "LABEL_923", + "924": "LABEL_924", + "925": "LABEL_925", + "926": "LABEL_926", + "927": "LABEL_927", + "928": "LABEL_928", + "929": "LABEL_929", + "930": "LABEL_930", + "931": "LABEL_931", + "932": "LABEL_932", + "933": "LABEL_933", + "934": "LABEL_934", + "935": "LABEL_935", + "936": "LABEL_936", + "937": "LABEL_937", + "938": "LABEL_938", + "939": "LABEL_939", + "940": "LABEL_940", + "941": "LABEL_941", + "942": "LABEL_942", + "943": "LABEL_943", + "944": "LABEL_944", + "945": "LABEL_945", + "946": "LABEL_946", + "947": "LABEL_947", + "948": "LABEL_948", + "949": "LABEL_949", + "950": "LABEL_950", + "951": "LABEL_951", + "952": "LABEL_952", + "953": "LABEL_953", + "954": "LABEL_954", + "955": "LABEL_955", + "956": "LABEL_956", + "957": "LABEL_957", + "958": "LABEL_958", + "959": "LABEL_959", + "960": "LABEL_960", + "961": "LABEL_961", + "962": "LABEL_962", + "963": "LABEL_963", + "964": "LABEL_964", + "965": "LABEL_965", + "966": "LABEL_966", + "967": "LABEL_967", + "968": "LABEL_968", + "969": "LABEL_969", + "970": "LABEL_970", + "971": "LABEL_971", + "972": "LABEL_972", + "973": "LABEL_973", + "974": "LABEL_974", + "975": "LABEL_975", + "976": "LABEL_976", + "977": "LABEL_977", + "978": "LABEL_978", + "979": "LABEL_979", + "980": "LABEL_980", + "981": "LABEL_981", + "982": "LABEL_982", + "983": "LABEL_983", + "984": "LABEL_984", + "985": "LABEL_985", + "986": "LABEL_986", + "987": "LABEL_987", + "988": "LABEL_988", + "989": "LABEL_989", + "990": "LABEL_990", + "991": "LABEL_991", + "992": "LABEL_992", + "993": "LABEL_993", + "994": "LABEL_994", + "995": "LABEL_995", + "996": "LABEL_996", + "997": "LABEL_997", + "998": "LABEL_998", + "999": "LABEL_999", + "1000": "LABEL_1000", + "1001": "LABEL_1001", + "1002": "LABEL_1002", + "1003": "LABEL_1003", + "1004": "LABEL_1004", + "1005": "LABEL_1005", + "1006": "LABEL_1006", + "1007": "LABEL_1007", + "1008": "LABEL_1008", + "1009": "LABEL_1009", + "1010": "LABEL_1010", + "1011": "LABEL_1011", + "1012": "LABEL_1012", + "1013": "LABEL_1013", + "1014": "LABEL_1014", + "1015": "LABEL_1015", + "1016": "LABEL_1016", + "1017": "LABEL_1017", + "1018": "LABEL_1018", + "1019": "LABEL_1019", + "1020": "LABEL_1020", + "1021": "LABEL_1021", + "1022": "LABEL_1022", + "1023": "LABEL_1023", + "1024": "LABEL_1024", + "1025": "LABEL_1025", + "1026": "LABEL_1026", + "1027": "LABEL_1027", + "1028": "LABEL_1028", + "1029": "LABEL_1029", + "1030": "LABEL_1030", + "1031": "LABEL_1031", + "1032": "LABEL_1032", + "1033": "LABEL_1033", + "1034": "LABEL_1034", + "1035": "LABEL_1035", + "1036": "LABEL_1036", + "1037": "LABEL_1037", + "1038": "LABEL_1038", + "1039": "LABEL_1039", + "1040": "LABEL_1040", + "1041": "LABEL_1041", + "1042": "LABEL_1042", + "1043": "LABEL_1043", + "1044": "LABEL_1044", + "1045": "LABEL_1045", + "1046": "LABEL_1046", + "1047": "LABEL_1047", + "1048": "LABEL_1048", + "1049": "LABEL_1049", + "1050": "LABEL_1050", + "1051": "LABEL_1051", + "1052": "LABEL_1052", + "1053": "LABEL_1053", + "1054": "LABEL_1054", + "1055": "LABEL_1055", + "1056": "LABEL_1056", + "1057": "LABEL_1057", + "1058": "LABEL_1058", + "1059": "LABEL_1059", + "1060": "LABEL_1060", + "1061": "LABEL_1061", + "1062": "LABEL_1062", + "1063": "LABEL_1063", + "1064": "LABEL_1064", + "1065": "LABEL_1065", + "1066": "LABEL_1066", + "1067": "LABEL_1067", + "1068": "LABEL_1068", + "1069": "LABEL_1069", + "1070": "LABEL_1070", + "1071": "LABEL_1071", + "1072": "LABEL_1072", + "1073": "LABEL_1073", + "1074": "LABEL_1074", + "1075": "LABEL_1075", + "1076": "LABEL_1076", + "1077": "LABEL_1077", + "1078": "LABEL_1078", + "1079": "LABEL_1079", + "1080": "LABEL_1080", + "1081": "LABEL_1081", + "1082": "LABEL_1082", + "1083": "LABEL_1083", + "1084": "LABEL_1084", + "1085": "LABEL_1085", + "1086": "LABEL_1086", + "1087": "LABEL_1087", + "1088": "LABEL_1088", + "1089": "LABEL_1089", + "1090": "LABEL_1090", + "1091": "LABEL_1091", + "1092": "LABEL_1092", + "1093": "LABEL_1093", + "1094": "LABEL_1094", + "1095": "LABEL_1095", + "1096": "LABEL_1096", + "1097": "LABEL_1097", + "1098": "LABEL_1098", + "1099": "LABEL_1099", + "1100": "LABEL_1100", + "1101": "LABEL_1101", + "1102": "LABEL_1102", + "1103": "LABEL_1103", + "1104": "LABEL_1104", + "1105": "LABEL_1105", + "1106": "LABEL_1106", + "1107": "LABEL_1107", + "1108": "LABEL_1108", + "1109": "LABEL_1109", + "1110": "LABEL_1110", + "1111": "LABEL_1111", + "1112": "LABEL_1112", + "1113": "LABEL_1113", + "1114": "LABEL_1114", + "1115": "LABEL_1115", + "1116": "LABEL_1116", + "1117": "LABEL_1117", + "1118": "LABEL_1118", + "1119": "LABEL_1119", + "1120": "LABEL_1120", + "1121": "LABEL_1121", + "1122": "LABEL_1122", + "1123": "LABEL_1123", + "1124": "LABEL_1124", + "1125": "LABEL_1125", + "1126": "LABEL_1126", + "1127": "LABEL_1127", + "1128": "LABEL_1128", + "1129": "LABEL_1129", + "1130": "LABEL_1130", + "1131": "LABEL_1131", + "1132": "LABEL_1132", + "1133": "LABEL_1133", + "1134": "LABEL_1134", + "1135": "LABEL_1135", + "1136": "LABEL_1136", + "1137": "LABEL_1137", + "1138": "LABEL_1138", + "1139": "LABEL_1139", + "1140": "LABEL_1140", + "1141": "LABEL_1141", + "1142": "LABEL_1142", + "1143": "LABEL_1143", + "1144": "LABEL_1144", + "1145": "LABEL_1145", + "1146": "LABEL_1146", + "1147": "LABEL_1147", + "1148": "LABEL_1148", + "1149": "LABEL_1149", + "1150": "LABEL_1150", + "1151": "LABEL_1151", + "1152": "LABEL_1152", + "1153": "LABEL_1153", + "1154": "LABEL_1154", + "1155": "LABEL_1155", + "1156": "LABEL_1156", + "1157": "LABEL_1157", + "1158": "LABEL_1158", + "1159": "LABEL_1159", + "1160": "LABEL_1160", + "1161": "LABEL_1161", + "1162": "LABEL_1162", + "1163": "LABEL_1163", + "1164": "LABEL_1164", + "1165": "LABEL_1165", + "1166": "LABEL_1166", + "1167": "LABEL_1167", + "1168": "LABEL_1168", + "1169": "LABEL_1169", + "1170": "LABEL_1170", + "1171": "LABEL_1171", + "1172": "LABEL_1172", + "1173": "LABEL_1173", + "1174": "LABEL_1174", + "1175": "LABEL_1175", + "1176": "LABEL_1176", + "1177": "LABEL_1177", + "1178": "LABEL_1178", + "1179": "LABEL_1179", + "1180": "LABEL_1180", + "1181": "LABEL_1181", + "1182": "LABEL_1182", + "1183": "LABEL_1183", + "1184": "LABEL_1184", + "1185": "LABEL_1185", + "1186": "LABEL_1186", + "1187": "LABEL_1187", + "1188": "LABEL_1188", + "1189": "LABEL_1189", + "1190": "LABEL_1190", + "1191": "LABEL_1191", + "1192": "LABEL_1192", + "1193": "LABEL_1193", + "1194": "LABEL_1194", + "1195": "LABEL_1195", + "1196": "LABEL_1196", + "1197": "LABEL_1197", + "1198": "LABEL_1198", + "1199": "LABEL_1199", + "1200": "LABEL_1200", + "1201": "LABEL_1201", + "1202": "LABEL_1202", + "1203": "LABEL_1203", + "1204": "LABEL_1204", + "1205": "LABEL_1205", + "1206": "LABEL_1206", + "1207": "LABEL_1207", + "1208": "LABEL_1208", + "1209": "LABEL_1209", + "1210": "LABEL_1210" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_10": 10, + "LABEL_100": 100, + "LABEL_1000": 1000, + "LABEL_1001": 1001, + "LABEL_1002": 1002, + "LABEL_1003": 1003, + "LABEL_1004": 1004, + "LABEL_1005": 1005, + "LABEL_1006": 1006, + "LABEL_1007": 1007, + "LABEL_1008": 1008, + "LABEL_1009": 1009, + "LABEL_101": 101, + "LABEL_1010": 1010, + "LABEL_1011": 1011, + "LABEL_1012": 1012, + "LABEL_1013": 1013, + "LABEL_1014": 1014, + "LABEL_1015": 1015, + "LABEL_1016": 1016, + "LABEL_1017": 1017, + "LABEL_1018": 1018, + "LABEL_1019": 1019, + "LABEL_102": 102, + "LABEL_1020": 1020, + "LABEL_1021": 1021, + "LABEL_1022": 1022, + "LABEL_1023": 1023, + "LABEL_1024": 1024, + "LABEL_1025": 1025, + "LABEL_1026": 1026, + "LABEL_1027": 1027, + "LABEL_1028": 1028, + "LABEL_1029": 1029, + "LABEL_103": 103, + "LABEL_1030": 1030, + "LABEL_1031": 1031, + "LABEL_1032": 1032, + "LABEL_1033": 1033, + "LABEL_1034": 1034, + "LABEL_1035": 1035, + "LABEL_1036": 1036, + "LABEL_1037": 1037, + "LABEL_1038": 1038, + "LABEL_1039": 1039, + "LABEL_104": 104, + "LABEL_1040": 1040, + "LABEL_1041": 1041, + "LABEL_1042": 1042, + "LABEL_1043": 1043, + "LABEL_1044": 1044, + "LABEL_1045": 1045, + "LABEL_1046": 1046, + "LABEL_1047": 1047, + "LABEL_1048": 1048, + "LABEL_1049": 1049, + "LABEL_105": 105, + "LABEL_1050": 1050, + "LABEL_1051": 1051, + "LABEL_1052": 1052, + "LABEL_1053": 1053, + "LABEL_1054": 1054, + "LABEL_1055": 1055, + "LABEL_1056": 1056, + "LABEL_1057": 1057, + "LABEL_1058": 1058, + "LABEL_1059": 1059, + "LABEL_106": 106, + "LABEL_1060": 1060, + "LABEL_1061": 1061, + "LABEL_1062": 1062, + "LABEL_1063": 1063, + "LABEL_1064": 1064, + "LABEL_1065": 1065, + "LABEL_1066": 1066, + "LABEL_1067": 1067, + "LABEL_1068": 1068, + "LABEL_1069": 1069, + "LABEL_107": 107, + "LABEL_1070": 1070, + "LABEL_1071": 1071, + "LABEL_1072": 1072, + "LABEL_1073": 1073, + "LABEL_1074": 1074, + "LABEL_1075": 1075, + "LABEL_1076": 1076, + "LABEL_1077": 1077, + "LABEL_1078": 1078, + "LABEL_1079": 1079, + "LABEL_108": 108, + "LABEL_1080": 1080, + "LABEL_1081": 1081, + "LABEL_1082": 1082, + "LABEL_1083": 1083, + "LABEL_1084": 1084, + "LABEL_1085": 1085, + "LABEL_1086": 1086, + "LABEL_1087": 1087, + "LABEL_1088": 1088, + "LABEL_1089": 1089, + "LABEL_109": 109, + "LABEL_1090": 1090, + "LABEL_1091": 1091, + "LABEL_1092": 1092, + "LABEL_1093": 1093, + "LABEL_1094": 1094, + "LABEL_1095": 1095, + "LABEL_1096": 1096, + "LABEL_1097": 1097, + "LABEL_1098": 1098, + "LABEL_1099": 1099, + "LABEL_11": 11, + "LABEL_110": 110, + "LABEL_1100": 1100, + "LABEL_1101": 1101, + "LABEL_1102": 1102, + "LABEL_1103": 1103, + "LABEL_1104": 1104, + "LABEL_1105": 1105, + "LABEL_1106": 1106, + "LABEL_1107": 1107, + "LABEL_1108": 1108, + "LABEL_1109": 1109, + "LABEL_111": 111, + "LABEL_1110": 1110, + "LABEL_1111": 1111, + "LABEL_1112": 1112, + "LABEL_1113": 1113, + "LABEL_1114": 1114, + "LABEL_1115": 1115, + "LABEL_1116": 1116, + "LABEL_1117": 1117, + "LABEL_1118": 1118, + "LABEL_1119": 1119, + "LABEL_112": 112, + "LABEL_1120": 1120, + "LABEL_1121": 1121, + "LABEL_1122": 1122, + "LABEL_1123": 1123, + "LABEL_1124": 1124, + "LABEL_1125": 1125, + "LABEL_1126": 1126, + "LABEL_1127": 1127, + "LABEL_1128": 1128, + "LABEL_1129": 1129, + "LABEL_113": 113, + "LABEL_1130": 1130, + "LABEL_1131": 1131, + "LABEL_1132": 1132, + "LABEL_1133": 1133, + "LABEL_1134": 1134, + "LABEL_1135": 1135, + "LABEL_1136": 1136, + "LABEL_1137": 1137, + "LABEL_1138": 1138, + "LABEL_1139": 1139, + "LABEL_114": 114, + "LABEL_1140": 1140, + "LABEL_1141": 1141, + "LABEL_1142": 1142, + "LABEL_1143": 1143, + "LABEL_1144": 1144, + "LABEL_1145": 1145, + "LABEL_1146": 1146, + "LABEL_1147": 1147, + "LABEL_1148": 1148, + "LABEL_1149": 1149, + "LABEL_115": 115, + "LABEL_1150": 1150, + "LABEL_1151": 1151, + "LABEL_1152": 1152, + "LABEL_1153": 1153, + "LABEL_1154": 1154, + "LABEL_1155": 1155, + "LABEL_1156": 1156, + "LABEL_1157": 1157, + "LABEL_1158": 1158, + "LABEL_1159": 1159, + "LABEL_116": 116, + "LABEL_1160": 1160, + "LABEL_1161": 1161, + "LABEL_1162": 1162, + "LABEL_1163": 1163, + "LABEL_1164": 1164, + "LABEL_1165": 1165, + "LABEL_1166": 1166, + "LABEL_1167": 1167, + "LABEL_1168": 1168, + "LABEL_1169": 1169, + "LABEL_117": 117, + "LABEL_1170": 1170, + "LABEL_1171": 1171, + "LABEL_1172": 1172, + "LABEL_1173": 1173, + "LABEL_1174": 1174, + "LABEL_1175": 1175, + "LABEL_1176": 1176, + "LABEL_1177": 1177, + "LABEL_1178": 1178, + "LABEL_1179": 1179, + "LABEL_118": 118, + "LABEL_1180": 1180, + "LABEL_1181": 1181, + "LABEL_1182": 1182, + "LABEL_1183": 1183, + "LABEL_1184": 1184, + "LABEL_1185": 1185, + "LABEL_1186": 1186, + "LABEL_1187": 1187, + "LABEL_1188": 1188, + "LABEL_1189": 1189, + "LABEL_119": 119, + "LABEL_1190": 1190, + "LABEL_1191": 1191, + "LABEL_1192": 1192, + "LABEL_1193": 1193, + "LABEL_1194": 1194, + "LABEL_1195": 1195, + "LABEL_1196": 1196, + "LABEL_1197": 1197, + "LABEL_1198": 1198, + "LABEL_1199": 1199, + "LABEL_12": 12, + "LABEL_120": 120, + "LABEL_1200": 1200, + "LABEL_1201": 1201, + "LABEL_1202": 1202, + "LABEL_1203": 1203, + "LABEL_1204": 1204, + "LABEL_1205": 1205, + "LABEL_1206": 1206, + "LABEL_1207": 1207, + "LABEL_1208": 1208, + "LABEL_1209": 1209, + "LABEL_121": 121, + "LABEL_1210": 1210, + "LABEL_122": 122, + "LABEL_123": 123, + "LABEL_124": 124, + "LABEL_125": 125, + "LABEL_126": 126, + "LABEL_127": 127, + "LABEL_128": 128, + "LABEL_129": 129, + "LABEL_13": 13, + "LABEL_130": 130, + "LABEL_131": 131, + "LABEL_132": 132, + "LABEL_133": 133, + "LABEL_134": 134, + "LABEL_135": 135, + "LABEL_136": 136, + "LABEL_137": 137, + "LABEL_138": 138, + "LABEL_139": 139, + "LABEL_14": 14, + "LABEL_140": 140, + "LABEL_141": 141, + "LABEL_142": 142, + "LABEL_143": 143, + "LABEL_144": 144, + "LABEL_145": 145, + "LABEL_146": 146, + "LABEL_147": 147, + "LABEL_148": 148, + "LABEL_149": 149, + "LABEL_15": 15, + "LABEL_150": 150, + "LABEL_151": 151, + "LABEL_152": 152, + "LABEL_153": 153, + "LABEL_154": 154, + "LABEL_155": 155, + "LABEL_156": 156, + "LABEL_157": 157, + "LABEL_158": 158, + "LABEL_159": 159, + "LABEL_16": 16, + "LABEL_160": 160, + "LABEL_161": 161, + "LABEL_162": 162, + "LABEL_163": 163, + "LABEL_164": 164, + "LABEL_165": 165, + "LABEL_166": 166, + "LABEL_167": 167, + "LABEL_168": 168, + "LABEL_169": 169, + "LABEL_17": 17, + "LABEL_170": 170, + "LABEL_171": 171, + "LABEL_172": 172, + "LABEL_173": 173, + "LABEL_174": 174, + "LABEL_175": 175, + "LABEL_176": 176, + "LABEL_177": 177, + "LABEL_178": 178, + "LABEL_179": 179, + "LABEL_18": 18, + "LABEL_180": 180, + "LABEL_181": 181, + "LABEL_182": 182, + "LABEL_183": 183, + "LABEL_184": 184, + "LABEL_185": 185, + "LABEL_186": 186, + "LABEL_187": 187, + "LABEL_188": 188, + "LABEL_189": 189, + "LABEL_19": 19, + "LABEL_190": 190, + "LABEL_191": 191, + "LABEL_192": 192, + "LABEL_193": 193, + "LABEL_194": 194, + "LABEL_195": 195, + "LABEL_196": 196, + "LABEL_197": 197, + "LABEL_198": 198, + "LABEL_199": 199, + "LABEL_2": 2, + "LABEL_20": 20, + "LABEL_200": 200, + "LABEL_201": 201, + "LABEL_202": 202, + "LABEL_203": 203, + "LABEL_204": 204, + "LABEL_205": 205, + "LABEL_206": 206, + "LABEL_207": 207, + "LABEL_208": 208, + "LABEL_209": 209, + "LABEL_21": 21, + "LABEL_210": 210, + "LABEL_211": 211, + "LABEL_212": 212, + "LABEL_213": 213, + "LABEL_214": 214, + "LABEL_215": 215, + "LABEL_216": 216, + "LABEL_217": 217, + "LABEL_218": 218, + "LABEL_219": 219, + "LABEL_22": 22, + "LABEL_220": 220, + "LABEL_221": 221, + "LABEL_222": 222, + "LABEL_223": 223, + "LABEL_224": 224, + "LABEL_225": 225, + "LABEL_226": 226, + "LABEL_227": 227, + "LABEL_228": 228, + "LABEL_229": 229, + "LABEL_23": 23, + "LABEL_230": 230, + "LABEL_231": 231, + "LABEL_232": 232, + "LABEL_233": 233, + "LABEL_234": 234, + "LABEL_235": 235, + "LABEL_236": 236, + "LABEL_237": 237, + "LABEL_238": 238, + "LABEL_239": 239, + "LABEL_24": 24, + "LABEL_240": 240, + "LABEL_241": 241, + "LABEL_242": 242, + "LABEL_243": 243, + "LABEL_244": 244, + "LABEL_245": 245, + "LABEL_246": 246, + "LABEL_247": 247, + "LABEL_248": 248, + "LABEL_249": 249, + "LABEL_25": 25, + "LABEL_250": 250, + "LABEL_251": 251, + "LABEL_252": 252, + "LABEL_253": 253, + "LABEL_254": 254, + "LABEL_255": 255, + "LABEL_256": 256, + "LABEL_257": 257, + "LABEL_258": 258, + "LABEL_259": 259, + "LABEL_26": 26, + "LABEL_260": 260, + "LABEL_261": 261, + "LABEL_262": 262, + "LABEL_263": 263, + "LABEL_264": 264, + "LABEL_265": 265, + "LABEL_266": 266, + "LABEL_267": 267, + "LABEL_268": 268, + "LABEL_269": 269, + "LABEL_27": 27, + "LABEL_270": 270, + "LABEL_271": 271, + "LABEL_272": 272, + "LABEL_273": 273, + "LABEL_274": 274, + "LABEL_275": 275, + "LABEL_276": 276, + "LABEL_277": 277, + "LABEL_278": 278, + "LABEL_279": 279, + "LABEL_28": 28, + "LABEL_280": 280, + "LABEL_281": 281, + "LABEL_282": 282, + "LABEL_283": 283, + "LABEL_284": 284, + "LABEL_285": 285, + "LABEL_286": 286, + "LABEL_287": 287, + "LABEL_288": 288, + "LABEL_289": 289, + "LABEL_29": 29, + "LABEL_290": 290, + "LABEL_291": 291, + "LABEL_292": 292, + "LABEL_293": 293, + "LABEL_294": 294, + "LABEL_295": 295, + "LABEL_296": 296, + "LABEL_297": 297, + "LABEL_298": 298, + "LABEL_299": 299, + "LABEL_3": 3, + "LABEL_30": 30, + "LABEL_300": 300, + "LABEL_301": 301, + "LABEL_302": 302, + "LABEL_303": 303, + "LABEL_304": 304, + "LABEL_305": 305, + "LABEL_306": 306, + "LABEL_307": 307, + "LABEL_308": 308, + "LABEL_309": 309, + "LABEL_31": 31, + "LABEL_310": 310, + "LABEL_311": 311, + "LABEL_312": 312, + "LABEL_313": 313, + "LABEL_314": 314, + "LABEL_315": 315, + "LABEL_316": 316, + "LABEL_317": 317, + "LABEL_318": 318, + "LABEL_319": 319, + "LABEL_32": 32, + "LABEL_320": 320, + "LABEL_321": 321, + "LABEL_322": 322, + "LABEL_323": 323, + "LABEL_324": 324, + "LABEL_325": 325, + "LABEL_326": 326, + "LABEL_327": 327, + "LABEL_328": 328, + "LABEL_329": 329, + "LABEL_33": 33, + "LABEL_330": 330, + "LABEL_331": 331, + "LABEL_332": 332, + "LABEL_333": 333, + "LABEL_334": 334, + "LABEL_335": 335, + "LABEL_336": 336, + "LABEL_337": 337, + "LABEL_338": 338, + "LABEL_339": 339, + "LABEL_34": 34, + "LABEL_340": 340, + "LABEL_341": 341, + "LABEL_342": 342, + "LABEL_343": 343, + "LABEL_344": 344, + "LABEL_345": 345, + "LABEL_346": 346, + "LABEL_347": 347, + "LABEL_348": 348, + "LABEL_349": 349, + "LABEL_35": 35, + "LABEL_350": 350, + "LABEL_351": 351, + "LABEL_352": 352, + "LABEL_353": 353, + "LABEL_354": 354, + "LABEL_355": 355, + "LABEL_356": 356, + "LABEL_357": 357, + "LABEL_358": 358, + "LABEL_359": 359, + "LABEL_36": 36, + "LABEL_360": 360, + "LABEL_361": 361, + "LABEL_362": 362, + "LABEL_363": 363, + "LABEL_364": 364, + "LABEL_365": 365, + "LABEL_366": 366, + "LABEL_367": 367, + "LABEL_368": 368, + "LABEL_369": 369, + "LABEL_37": 37, + "LABEL_370": 370, + "LABEL_371": 371, + "LABEL_372": 372, + "LABEL_373": 373, + "LABEL_374": 374, + "LABEL_375": 375, + "LABEL_376": 376, + "LABEL_377": 377, + "LABEL_378": 378, + "LABEL_379": 379, + "LABEL_38": 38, + "LABEL_380": 380, + "LABEL_381": 381, + "LABEL_382": 382, + "LABEL_383": 383, + "LABEL_384": 384, + "LABEL_385": 385, + "LABEL_386": 386, + "LABEL_387": 387, + "LABEL_388": 388, + "LABEL_389": 389, + "LABEL_39": 39, + "LABEL_390": 390, + "LABEL_391": 391, + "LABEL_392": 392, + "LABEL_393": 393, + "LABEL_394": 394, + "LABEL_395": 395, + "LABEL_396": 396, + "LABEL_397": 397, + "LABEL_398": 398, + "LABEL_399": 399, + "LABEL_4": 4, + "LABEL_40": 40, + "LABEL_400": 400, + "LABEL_401": 401, + "LABEL_402": 402, + "LABEL_403": 403, + "LABEL_404": 404, + "LABEL_405": 405, + "LABEL_406": 406, + "LABEL_407": 407, + "LABEL_408": 408, + "LABEL_409": 409, + "LABEL_41": 41, + "LABEL_410": 410, + "LABEL_411": 411, + "LABEL_412": 412, + "LABEL_413": 413, + "LABEL_414": 414, + "LABEL_415": 415, + "LABEL_416": 416, + "LABEL_417": 417, + "LABEL_418": 418, + "LABEL_419": 419, + "LABEL_42": 42, + "LABEL_420": 420, + "LABEL_421": 421, + "LABEL_422": 422, + "LABEL_423": 423, + "LABEL_424": 424, + "LABEL_425": 425, + "LABEL_426": 426, + "LABEL_427": 427, + "LABEL_428": 428, + "LABEL_429": 429, + "LABEL_43": 43, + "LABEL_430": 430, + "LABEL_431": 431, + "LABEL_432": 432, + "LABEL_433": 433, + "LABEL_434": 434, + "LABEL_435": 435, + "LABEL_436": 436, + "LABEL_437": 437, + "LABEL_438": 438, + "LABEL_439": 439, + "LABEL_44": 44, + "LABEL_440": 440, + "LABEL_441": 441, + "LABEL_442": 442, + "LABEL_443": 443, + "LABEL_444": 444, + "LABEL_445": 445, + "LABEL_446": 446, + "LABEL_447": 447, + "LABEL_448": 448, + "LABEL_449": 449, + "LABEL_45": 45, + "LABEL_450": 450, + "LABEL_451": 451, + "LABEL_452": 452, + "LABEL_453": 453, + "LABEL_454": 454, + "LABEL_455": 455, + "LABEL_456": 456, + "LABEL_457": 457, + "LABEL_458": 458, + "LABEL_459": 459, + "LABEL_46": 46, + "LABEL_460": 460, + "LABEL_461": 461, + "LABEL_462": 462, + "LABEL_463": 463, + "LABEL_464": 464, + "LABEL_465": 465, + "LABEL_466": 466, + "LABEL_467": 467, + "LABEL_468": 468, + "LABEL_469": 469, + "LABEL_47": 47, + "LABEL_470": 470, + "LABEL_471": 471, + "LABEL_472": 472, + "LABEL_473": 473, + "LABEL_474": 474, + "LABEL_475": 475, + "LABEL_476": 476, + "LABEL_477": 477, + "LABEL_478": 478, + "LABEL_479": 479, + "LABEL_48": 48, + "LABEL_480": 480, + "LABEL_481": 481, + "LABEL_482": 482, + "LABEL_483": 483, + "LABEL_484": 484, + "LABEL_485": 485, + "LABEL_486": 486, + "LABEL_487": 487, + "LABEL_488": 488, + "LABEL_489": 489, + "LABEL_49": 49, + "LABEL_490": 490, + "LABEL_491": 491, + "LABEL_492": 492, + "LABEL_493": 493, + "LABEL_494": 494, + "LABEL_495": 495, + "LABEL_496": 496, + "LABEL_497": 497, + "LABEL_498": 498, + "LABEL_499": 499, + "LABEL_5": 5, + "LABEL_50": 50, + "LABEL_500": 500, + "LABEL_501": 501, + "LABEL_502": 502, + "LABEL_503": 503, + "LABEL_504": 504, + "LABEL_505": 505, + "LABEL_506": 506, + "LABEL_507": 507, + "LABEL_508": 508, + "LABEL_509": 509, + "LABEL_51": 51, + "LABEL_510": 510, + "LABEL_511": 511, + "LABEL_512": 512, + "LABEL_513": 513, + "LABEL_514": 514, + "LABEL_515": 515, + "LABEL_516": 516, + "LABEL_517": 517, + "LABEL_518": 518, + "LABEL_519": 519, + "LABEL_52": 52, + "LABEL_520": 520, + "LABEL_521": 521, + "LABEL_522": 522, + "LABEL_523": 523, + "LABEL_524": 524, + "LABEL_525": 525, + "LABEL_526": 526, + "LABEL_527": 527, + "LABEL_528": 528, + "LABEL_529": 529, + "LABEL_53": 53, + "LABEL_530": 530, + "LABEL_531": 531, + "LABEL_532": 532, + "LABEL_533": 533, + "LABEL_534": 534, + "LABEL_535": 535, + "LABEL_536": 536, + "LABEL_537": 537, + "LABEL_538": 538, + "LABEL_539": 539, + "LABEL_54": 54, + "LABEL_540": 540, + "LABEL_541": 541, + "LABEL_542": 542, + "LABEL_543": 543, + "LABEL_544": 544, + "LABEL_545": 545, + "LABEL_546": 546, + "LABEL_547": 547, + "LABEL_548": 548, + "LABEL_549": 549, + "LABEL_55": 55, + "LABEL_550": 550, + "LABEL_551": 551, + "LABEL_552": 552, + "LABEL_553": 553, + "LABEL_554": 554, + "LABEL_555": 555, + "LABEL_556": 556, + "LABEL_557": 557, + "LABEL_558": 558, + "LABEL_559": 559, + "LABEL_56": 56, + "LABEL_560": 560, + "LABEL_561": 561, + "LABEL_562": 562, + "LABEL_563": 563, + "LABEL_564": 564, + "LABEL_565": 565, + "LABEL_566": 566, + "LABEL_567": 567, + "LABEL_568": 568, + "LABEL_569": 569, + "LABEL_57": 57, + "LABEL_570": 570, + "LABEL_571": 571, + "LABEL_572": 572, + "LABEL_573": 573, + "LABEL_574": 574, + "LABEL_575": 575, + "LABEL_576": 576, + "LABEL_577": 577, + "LABEL_578": 578, + "LABEL_579": 579, + "LABEL_58": 58, + "LABEL_580": 580, + "LABEL_581": 581, + "LABEL_582": 582, + "LABEL_583": 583, + "LABEL_584": 584, + "LABEL_585": 585, + "LABEL_586": 586, + "LABEL_587": 587, + "LABEL_588": 588, + "LABEL_589": 589, + "LABEL_59": 59, + "LABEL_590": 590, + "LABEL_591": 591, + "LABEL_592": 592, + "LABEL_593": 593, + "LABEL_594": 594, + "LABEL_595": 595, + "LABEL_596": 596, + "LABEL_597": 597, + "LABEL_598": 598, + "LABEL_599": 599, + "LABEL_6": 6, + "LABEL_60": 60, + "LABEL_600": 600, + "LABEL_601": 601, + "LABEL_602": 602, + "LABEL_603": 603, + "LABEL_604": 604, + "LABEL_605": 605, + "LABEL_606": 606, + "LABEL_607": 607, + "LABEL_608": 608, + "LABEL_609": 609, + "LABEL_61": 61, + "LABEL_610": 610, + "LABEL_611": 611, + "LABEL_612": 612, + "LABEL_613": 613, + "LABEL_614": 614, + "LABEL_615": 615, + "LABEL_616": 616, + "LABEL_617": 617, + "LABEL_618": 618, + "LABEL_619": 619, + "LABEL_62": 62, + "LABEL_620": 620, + "LABEL_621": 621, + "LABEL_622": 622, + "LABEL_623": 623, + "LABEL_624": 624, + "LABEL_625": 625, + "LABEL_626": 626, + "LABEL_627": 627, + "LABEL_628": 628, + "LABEL_629": 629, + "LABEL_63": 63, + "LABEL_630": 630, + "LABEL_631": 631, + "LABEL_632": 632, + "LABEL_633": 633, + "LABEL_634": 634, + "LABEL_635": 635, + "LABEL_636": 636, + "LABEL_637": 637, + "LABEL_638": 638, + "LABEL_639": 639, + "LABEL_64": 64, + "LABEL_640": 640, + "LABEL_641": 641, + "LABEL_642": 642, + "LABEL_643": 643, + "LABEL_644": 644, + "LABEL_645": 645, + "LABEL_646": 646, + "LABEL_647": 647, + "LABEL_648": 648, + "LABEL_649": 649, + "LABEL_65": 65, + "LABEL_650": 650, + "LABEL_651": 651, + "LABEL_652": 652, + "LABEL_653": 653, + "LABEL_654": 654, + "LABEL_655": 655, + "LABEL_656": 656, + "LABEL_657": 657, + "LABEL_658": 658, + "LABEL_659": 659, + "LABEL_66": 66, + "LABEL_660": 660, + "LABEL_661": 661, + "LABEL_662": 662, + "LABEL_663": 663, + "LABEL_664": 664, + "LABEL_665": 665, + "LABEL_666": 666, + "LABEL_667": 667, + "LABEL_668": 668, + "LABEL_669": 669, + "LABEL_67": 67, + "LABEL_670": 670, + "LABEL_671": 671, + "LABEL_672": 672, + "LABEL_673": 673, + "LABEL_674": 674, + "LABEL_675": 675, + "LABEL_676": 676, + "LABEL_677": 677, + "LABEL_678": 678, + "LABEL_679": 679, + "LABEL_68": 68, + "LABEL_680": 680, + "LABEL_681": 681, + "LABEL_682": 682, + "LABEL_683": 683, + "LABEL_684": 684, + "LABEL_685": 685, + "LABEL_686": 686, + "LABEL_687": 687, + "LABEL_688": 688, + "LABEL_689": 689, + "LABEL_69": 69, + "LABEL_690": 690, + "LABEL_691": 691, + "LABEL_692": 692, + "LABEL_693": 693, + "LABEL_694": 694, + "LABEL_695": 695, + "LABEL_696": 696, + "LABEL_697": 697, + "LABEL_698": 698, + "LABEL_699": 699, + "LABEL_7": 7, + "LABEL_70": 70, + "LABEL_700": 700, + "LABEL_701": 701, + "LABEL_702": 702, + "LABEL_703": 703, + "LABEL_704": 704, + "LABEL_705": 705, + "LABEL_706": 706, + "LABEL_707": 707, + "LABEL_708": 708, + "LABEL_709": 709, + "LABEL_71": 71, + "LABEL_710": 710, + "LABEL_711": 711, + "LABEL_712": 712, + "LABEL_713": 713, + "LABEL_714": 714, + "LABEL_715": 715, + "LABEL_716": 716, + "LABEL_717": 717, + "LABEL_718": 718, + "LABEL_719": 719, + "LABEL_72": 72, + "LABEL_720": 720, + "LABEL_721": 721, + "LABEL_722": 722, + "LABEL_723": 723, + "LABEL_724": 724, + "LABEL_725": 725, + "LABEL_726": 726, + "LABEL_727": 727, + "LABEL_728": 728, + "LABEL_729": 729, + "LABEL_73": 73, + "LABEL_730": 730, + "LABEL_731": 731, + "LABEL_732": 732, + "LABEL_733": 733, + "LABEL_734": 734, + "LABEL_735": 735, + "LABEL_736": 736, + "LABEL_737": 737, + "LABEL_738": 738, + "LABEL_739": 739, + "LABEL_74": 74, + "LABEL_740": 740, + "LABEL_741": 741, + "LABEL_742": 742, + "LABEL_743": 743, + "LABEL_744": 744, + "LABEL_745": 745, + "LABEL_746": 746, + "LABEL_747": 747, + "LABEL_748": 748, + "LABEL_749": 749, + "LABEL_75": 75, + "LABEL_750": 750, + "LABEL_751": 751, + "LABEL_752": 752, + "LABEL_753": 753, + "LABEL_754": 754, + "LABEL_755": 755, + "LABEL_756": 756, + "LABEL_757": 757, + "LABEL_758": 758, + "LABEL_759": 759, + "LABEL_76": 76, + "LABEL_760": 760, + "LABEL_761": 761, + "LABEL_762": 762, + "LABEL_763": 763, + "LABEL_764": 764, + "LABEL_765": 765, + "LABEL_766": 766, + "LABEL_767": 767, + "LABEL_768": 768, + "LABEL_769": 769, + "LABEL_77": 77, + "LABEL_770": 770, + "LABEL_771": 771, + "LABEL_772": 772, + "LABEL_773": 773, + "LABEL_774": 774, + "LABEL_775": 775, + "LABEL_776": 776, + "LABEL_777": 777, + "LABEL_778": 778, + "LABEL_779": 779, + "LABEL_78": 78, + "LABEL_780": 780, + "LABEL_781": 781, + "LABEL_782": 782, + "LABEL_783": 783, + "LABEL_784": 784, + "LABEL_785": 785, + "LABEL_786": 786, + "LABEL_787": 787, + "LABEL_788": 788, + "LABEL_789": 789, + "LABEL_79": 79, + "LABEL_790": 790, + "LABEL_791": 791, + "LABEL_792": 792, + "LABEL_793": 793, + "LABEL_794": 794, + "LABEL_795": 795, + "LABEL_796": 796, + "LABEL_797": 797, + "LABEL_798": 798, + "LABEL_799": 799, + "LABEL_8": 8, + "LABEL_80": 80, + "LABEL_800": 800, + "LABEL_801": 801, + "LABEL_802": 802, + "LABEL_803": 803, + "LABEL_804": 804, + "LABEL_805": 805, + "LABEL_806": 806, + "LABEL_807": 807, + "LABEL_808": 808, + "LABEL_809": 809, + "LABEL_81": 81, + "LABEL_810": 810, + "LABEL_811": 811, + "LABEL_812": 812, + "LABEL_813": 813, + "LABEL_814": 814, + "LABEL_815": 815, + "LABEL_816": 816, + "LABEL_817": 817, + "LABEL_818": 818, + "LABEL_819": 819, + "LABEL_82": 82, + "LABEL_820": 820, + "LABEL_821": 821, + "LABEL_822": 822, + "LABEL_823": 823, + "LABEL_824": 824, + "LABEL_825": 825, + "LABEL_826": 826, + "LABEL_827": 827, + "LABEL_828": 828, + "LABEL_829": 829, + "LABEL_83": 83, + "LABEL_830": 830, + "LABEL_831": 831, + "LABEL_832": 832, + "LABEL_833": 833, + "LABEL_834": 834, + "LABEL_835": 835, + "LABEL_836": 836, + "LABEL_837": 837, + "LABEL_838": 838, + "LABEL_839": 839, + "LABEL_84": 84, + "LABEL_840": 840, + "LABEL_841": 841, + "LABEL_842": 842, + "LABEL_843": 843, + "LABEL_844": 844, + "LABEL_845": 845, + "LABEL_846": 846, + "LABEL_847": 847, + "LABEL_848": 848, + "LABEL_849": 849, + "LABEL_85": 85, + "LABEL_850": 850, + "LABEL_851": 851, + "LABEL_852": 852, + "LABEL_853": 853, + "LABEL_854": 854, + "LABEL_855": 855, + "LABEL_856": 856, + "LABEL_857": 857, + "LABEL_858": 858, + "LABEL_859": 859, + "LABEL_86": 86, + "LABEL_860": 860, + "LABEL_861": 861, + "LABEL_862": 862, + "LABEL_863": 863, + "LABEL_864": 864, + "LABEL_865": 865, + "LABEL_866": 866, + "LABEL_867": 867, + "LABEL_868": 868, + "LABEL_869": 869, + "LABEL_87": 87, + "LABEL_870": 870, + "LABEL_871": 871, + "LABEL_872": 872, + "LABEL_873": 873, + "LABEL_874": 874, + "LABEL_875": 875, + "LABEL_876": 876, + "LABEL_877": 877, + "LABEL_878": 878, + "LABEL_879": 879, + "LABEL_88": 88, + "LABEL_880": 880, + "LABEL_881": 881, + "LABEL_882": 882, + "LABEL_883": 883, + "LABEL_884": 884, + "LABEL_885": 885, + "LABEL_886": 886, + "LABEL_887": 887, + "LABEL_888": 888, + "LABEL_889": 889, + "LABEL_89": 89, + "LABEL_890": 890, + "LABEL_891": 891, + "LABEL_892": 892, + "LABEL_893": 893, + "LABEL_894": 894, + "LABEL_895": 895, + "LABEL_896": 896, + "LABEL_897": 897, + "LABEL_898": 898, + "LABEL_899": 899, + "LABEL_9": 9, + "LABEL_90": 90, + "LABEL_900": 900, + "LABEL_901": 901, + "LABEL_902": 902, + "LABEL_903": 903, + "LABEL_904": 904, + "LABEL_905": 905, + "LABEL_906": 906, + "LABEL_907": 907, + "LABEL_908": 908, + "LABEL_909": 909, + "LABEL_91": 91, + "LABEL_910": 910, + "LABEL_911": 911, + "LABEL_912": 912, + "LABEL_913": 913, + "LABEL_914": 914, + "LABEL_915": 915, + "LABEL_916": 916, + "LABEL_917": 917, + "LABEL_918": 918, + "LABEL_919": 919, + "LABEL_92": 92, + "LABEL_920": 920, + "LABEL_921": 921, + "LABEL_922": 922, + "LABEL_923": 923, + "LABEL_924": 924, + "LABEL_925": 925, + "LABEL_926": 926, + "LABEL_927": 927, + "LABEL_928": 928, + "LABEL_929": 929, + "LABEL_93": 93, + "LABEL_930": 930, + "LABEL_931": 931, + "LABEL_932": 932, + "LABEL_933": 933, + "LABEL_934": 934, + "LABEL_935": 935, + "LABEL_936": 936, + "LABEL_937": 937, + "LABEL_938": 938, + "LABEL_939": 939, + "LABEL_94": 94, + "LABEL_940": 940, + "LABEL_941": 941, + "LABEL_942": 942, + "LABEL_943": 943, + "LABEL_944": 944, + "LABEL_945": 945, + "LABEL_946": 946, + "LABEL_947": 947, + "LABEL_948": 948, + "LABEL_949": 949, + "LABEL_95": 95, + "LABEL_950": 950, + "LABEL_951": 951, + "LABEL_952": 952, + "LABEL_953": 953, + "LABEL_954": 954, + "LABEL_955": 955, + "LABEL_956": 956, + "LABEL_957": 957, + "LABEL_958": 958, + "LABEL_959": 959, + "LABEL_96": 96, + "LABEL_960": 960, + "LABEL_961": 961, + "LABEL_962": 962, + "LABEL_963": 963, + "LABEL_964": 964, + "LABEL_965": 965, + "LABEL_966": 966, + "LABEL_967": 967, + "LABEL_968": 968, + "LABEL_969": 969, + "LABEL_97": 97, + "LABEL_970": 970, + "LABEL_971": 971, + "LABEL_972": 972, + "LABEL_973": 973, + "LABEL_974": 974, + "LABEL_975": 975, + "LABEL_976": 976, + "LABEL_977": 977, + "LABEL_978": 978, + "LABEL_979": 979, + "LABEL_98": 98, + "LABEL_980": 980, + "LABEL_981": 981, + "LABEL_982": 982, + "LABEL_983": 983, + "LABEL_984": 984, + "LABEL_985": 985, + "LABEL_986": 986, + "LABEL_987": 987, + "LABEL_988": 988, + "LABEL_989": 989, + "LABEL_99": 99, + "LABEL_990": 990, + "LABEL_991": 991, + "LABEL_992": 992, + "LABEL_993": 993, + "LABEL_994": 994, + "LABEL_995": 995, + "LABEL_996": 996, + "LABEL_997": 997, + "LABEL_998": 998, + "LABEL_999": 999 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.05, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "replace_prob": 0.5, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "transformers_version": "4.33.2", + "use_weighted_layer_sum": true, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/wavlm-base-plus-sv/onnx/model.onnx b/wavlm-base-plus-sv/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6b397a8f331419a87e0d56553ed1fed3ca07f75b --- /dev/null +++ b/wavlm-base-plus-sv/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a38bdd854a11db171357cb997156511697d2f2c621d1262c82ba91b873d08b +size 402471430 diff --git a/wavlm-base-plus-sv/onnx/model_quantized.onnx b/wavlm-base-plus-sv/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..31329769a9f75eb26c7591795c686f3d14b7e4ed --- /dev/null +++ b/wavlm-base-plus-sv/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:576bf6017796bdd179824d801b3a355a1dda2451559c6a398562175e33589f68 +size 101683453 diff --git a/wavlm-base-plus-sv/preprocessor_config.json b/wavlm-base-plus-sv/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/wavlm-base-plus-sv/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wavlm-base-plus-sv/quantize_config.json b/wavlm-base-plus-sv/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7ae5f3f16a7f35e8e366ca22ee50f42de325cfad --- /dev/null +++ b/wavlm-base-plus-sv/quantize_config.json @@ -0,0 +1,48 @@ +{ + "per_channel": false, + "reduce_range": false, + "per_model_config": { + "model": { + "op_types": [ + "Sigmoid", + "Transpose", + "ConstantOfShape", + "Constant", + "Cast", + "Greater", + "Less", + "MatMul", + "Relu", + "Gather", + "Erf", + "Shape", + "Pad", + "Div", + "Mul", + "ReduceSum", + "Conv", + "ReduceMean", + "Reshape", + "Gemm", + "Range", + "Sqrt", + "Min", + "InstanceNormalization", + "Abs", + "Where", + "Sub", + "Softmax", + "Slice", + "ReduceProd", + "Expand", + "Tile", + "Unsqueeze", + "Add", + "Log", + "Pow", + "Concat" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/wavlm-base-plus-sv/source.txt b/wavlm-base-plus-sv/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf9ea98ab0440079052144b986a9c034704c705d --- /dev/null +++ b/wavlm-base-plus-sv/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/wavlm-base-plus-sv \ No newline at end of file diff --git a/wavlm-base-plus/.gitattributes b/wavlm-base-plus/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-base-plus/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-base-plus/README.md b/wavlm-base-plus/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eb78e012910ffc024d1be50d3396a800725f4e66 --- /dev/null +++ b/wavlm-base-plus/README.md @@ -0,0 +1,8 @@ +--- +base_model: microsoft/wavlm-base-plus +library_name: transformers.js +--- + +https://huggingface.co/microsoft/wavlm-base-plus with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/wavlm-base-plus/config.json b/wavlm-base-plus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a99d6f8928850b0ccd9503a6c4d36a9b10973219 --- /dev/null +++ b/wavlm-base-plus/config.json @@ -0,0 +1,121 @@ +{ + "_name_or_path": "microsoft/wavlm-base-plus", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMModel" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.05, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "replace_prob": 0.5, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "transformers_version": "4.33.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/wavlm-base-plus/onnx/model.onnx b/wavlm-base-plus/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3dce181442e418d659a02f71e479545a555f0e23 --- /dev/null +++ b/wavlm-base-plus/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07789ad4d9e01cba75fce7fd923e7a5e9a0aaf0f43df25ba6ee6d3d6d7adaa1c +size 377935358 diff --git a/wavlm-base-plus/onnx/model_fp16.onnx b/wavlm-base-plus/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..82707001d8593db06c5dec3e80ff8e9b6f20e752 --- /dev/null +++ b/wavlm-base-plus/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8595e889bc108a87544b1c815c2c033b11e7824d9d18732be8e21d16fef0a351 +size 189308046 diff --git a/wavlm-base-plus/onnx/model_quantized.onnx b/wavlm-base-plus/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..40275a530f6faab5c885ea2a6d2b75bcf9318fd5 --- /dev/null +++ b/wavlm-base-plus/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9629e437514e16880eacc868dde9db6f319fa0864fdbb95de61fd39af3dbfe89 +size 95421869 diff --git a/wavlm-base-plus/preprocessor_config.json b/wavlm-base-plus/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/wavlm-base-plus/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wavlm-base-plus/quantize_config.json b/wavlm-base-plus/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2590c6453fac918462f37d8c824f21935d4efa94 --- /dev/null +++ b/wavlm-base-plus/quantize_config.json @@ -0,0 +1,45 @@ +{ + "per_channel": false, + "reduce_range": false, + "per_model_config": { + "model": { + "op_types": [ + "Transpose", + "Pow", + "Gather", + "Shape", + "Abs", + "Min", + "MatMul", + "Tile", + "Softmax", + "ReduceMean", + "Log", + "Concat", + "ReduceSum", + "Div", + "Cast", + "Reshape", + "Sqrt", + "InstanceNormalization", + "Sigmoid", + "Unsqueeze", + "Range", + "Gemm", + "Erf", + "Greater", + "Where", + "Sub", + "Conv", + "Less", + "Slice", + "Expand", + "ConstantOfShape", + "Constant", + "Mul", + "Add" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/wavlm-base-plus/source.txt b/wavlm-base-plus/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e906a907cd2897083311bf060baf12a3f1c6f32 --- /dev/null +++ b/wavlm-base-plus/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/wavlm-base-plus \ No newline at end of file diff --git a/wavlm-base-sv/.gitattributes b/wavlm-base-sv/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-base-sv/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-base-sv/README.md b/wavlm-base-sv/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3dc9c7bd6c9b2f93e8d3efa7e2210ab0ba732a86 --- /dev/null +++ b/wavlm-base-sv/README.md @@ -0,0 +1,47 @@ +--- +base_model: microsoft/wavlm-base-sv +library_name: transformers.js +--- + +https://huggingface.co/microsoft/wavlm-base-sv with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: +```bash +npm i @xenova/transformers +``` + +**Example:** Speaker verification w/ `Xenova/wavlm-base-sv`. + +```js +import { AutoProcessor, AutoModel, read_audio, cos_sim } from '@xenova/transformers'; + +// Load processor and model +const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-sv'); +const model = await AutoModel.from_pretrained('Xenova/wavlm-base-sv'); + +// Helper function to compute speaker embedding from audio URL +async function compute_embedding(url) { + const audio = await read_audio(url, 16000); + const inputs = await processor(audio); + const { embeddings } = await model(inputs); + return embeddings.data; +} + +// Generate speaker embeddings +const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/sv_speaker'; +const speaker_1_1 = await compute_embedding(`${BASE_URL}-1_1.wav`); +const speaker_1_2 = await compute_embedding(`${BASE_URL}-1_2.wav`); +const speaker_2_1 = await compute_embedding(`${BASE_URL}-2_1.wav`); +const speaker_2_2 = await compute_embedding(`${BASE_URL}-2_2.wav`); + +// Compute similarity scores +console.log(cos_sim(speaker_1_1, speaker_1_2)); // 0.9339586437268694 (Both are speaker 1) +console.log(cos_sim(speaker_1_2, speaker_2_1)); // 0.7096775310911547 (Different speakers) +console.log(cos_sim(speaker_2_1, speaker_2_2)); // 0.9603887462630838 (Both are speaker 2) +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/wavlm-base-sv/config.json b/wavlm-base-sv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e7c35374a33dd5b5cca1cf011f9d9743e632b49 --- /dev/null +++ b/wavlm-base-sv/config.json @@ -0,0 +1,2546 @@ +{ + "_name_or_path": "microsoft/wavlm-base-sv", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMForXVector" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2", + "3": "LABEL_3", + "4": "LABEL_4", + "5": "LABEL_5", + "6": "LABEL_6", + "7": "LABEL_7", + "8": "LABEL_8", + "9": "LABEL_9", + "10": "LABEL_10", + "11": "LABEL_11", + "12": "LABEL_12", + "13": "LABEL_13", + "14": "LABEL_14", + "15": "LABEL_15", + "16": "LABEL_16", + "17": "LABEL_17", + "18": "LABEL_18", + "19": "LABEL_19", + "20": "LABEL_20", + "21": "LABEL_21", + "22": "LABEL_22", + "23": "LABEL_23", + "24": "LABEL_24", + "25": "LABEL_25", + "26": "LABEL_26", + "27": "LABEL_27", + "28": "LABEL_28", + "29": "LABEL_29", + "30": "LABEL_30", + "31": "LABEL_31", + "32": "LABEL_32", + "33": "LABEL_33", + "34": "LABEL_34", + "35": "LABEL_35", + "36": "LABEL_36", + "37": "LABEL_37", + "38": "LABEL_38", + "39": "LABEL_39", + "40": "LABEL_40", + "41": "LABEL_41", + "42": "LABEL_42", + "43": "LABEL_43", + "44": "LABEL_44", + "45": "LABEL_45", + "46": "LABEL_46", + "47": "LABEL_47", + "48": "LABEL_48", + "49": "LABEL_49", + "50": "LABEL_50", + "51": "LABEL_51", + "52": "LABEL_52", + "53": "LABEL_53", + "54": "LABEL_54", + "55": "LABEL_55", + "56": "LABEL_56", + "57": "LABEL_57", + "58": "LABEL_58", + "59": "LABEL_59", + "60": "LABEL_60", + "61": "LABEL_61", + "62": "LABEL_62", + "63": "LABEL_63", + "64": "LABEL_64", + "65": "LABEL_65", + "66": "LABEL_66", + "67": "LABEL_67", + "68": "LABEL_68", + "69": "LABEL_69", + "70": "LABEL_70", + "71": "LABEL_71", + "72": "LABEL_72", + "73": "LABEL_73", + "74": "LABEL_74", + "75": "LABEL_75", + "76": "LABEL_76", + "77": "LABEL_77", + "78": "LABEL_78", + "79": "LABEL_79", + "80": "LABEL_80", + "81": "LABEL_81", + "82": "LABEL_82", + "83": "LABEL_83", + "84": "LABEL_84", + "85": "LABEL_85", + "86": "LABEL_86", + "87": "LABEL_87", + "88": "LABEL_88", + "89": "LABEL_89", + "90": "LABEL_90", + "91": "LABEL_91", + "92": "LABEL_92", + "93": "LABEL_93", + "94": "LABEL_94", + "95": "LABEL_95", + "96": "LABEL_96", + "97": "LABEL_97", + "98": "LABEL_98", + "99": "LABEL_99", + "100": "LABEL_100", + "101": "LABEL_101", + "102": "LABEL_102", + "103": "LABEL_103", + "104": "LABEL_104", + "105": "LABEL_105", + "106": "LABEL_106", + "107": "LABEL_107", + "108": "LABEL_108", + "109": "LABEL_109", + "110": "LABEL_110", + "111": "LABEL_111", + "112": "LABEL_112", + "113": "LABEL_113", + "114": "LABEL_114", + "115": "LABEL_115", + "116": "LABEL_116", + "117": "LABEL_117", + "118": "LABEL_118", + "119": "LABEL_119", + "120": "LABEL_120", + "121": "LABEL_121", + "122": "LABEL_122", + "123": "LABEL_123", + "124": "LABEL_124", + "125": "LABEL_125", + "126": "LABEL_126", + "127": "LABEL_127", + "128": "LABEL_128", + "129": "LABEL_129", + "130": "LABEL_130", + "131": "LABEL_131", + "132": "LABEL_132", + "133": "LABEL_133", + "134": "LABEL_134", + "135": "LABEL_135", + "136": "LABEL_136", + "137": "LABEL_137", + "138": "LABEL_138", + "139": "LABEL_139", + "140": "LABEL_140", + "141": "LABEL_141", + "142": "LABEL_142", + "143": "LABEL_143", + "144": "LABEL_144", + "145": "LABEL_145", + "146": "LABEL_146", + "147": "LABEL_147", + "148": "LABEL_148", + "149": "LABEL_149", + "150": "LABEL_150", + "151": "LABEL_151", + "152": "LABEL_152", + "153": "LABEL_153", + "154": "LABEL_154", + "155": "LABEL_155", + "156": "LABEL_156", + "157": "LABEL_157", + "158": "LABEL_158", + "159": "LABEL_159", + "160": "LABEL_160", + "161": "LABEL_161", + "162": "LABEL_162", + "163": "LABEL_163", + "164": "LABEL_164", + "165": "LABEL_165", + "166": "LABEL_166", + "167": "LABEL_167", + "168": "LABEL_168", + "169": "LABEL_169", + "170": "LABEL_170", + "171": "LABEL_171", + "172": "LABEL_172", + "173": "LABEL_173", + "174": "LABEL_174", + "175": "LABEL_175", + "176": "LABEL_176", + "177": "LABEL_177", + "178": "LABEL_178", + "179": "LABEL_179", + "180": "LABEL_180", + "181": "LABEL_181", + "182": "LABEL_182", + "183": "LABEL_183", + "184": "LABEL_184", + "185": "LABEL_185", + "186": "LABEL_186", + "187": "LABEL_187", + "188": "LABEL_188", + "189": "LABEL_189", + "190": "LABEL_190", + "191": "LABEL_191", + "192": "LABEL_192", + "193": "LABEL_193", + "194": "LABEL_194", + "195": "LABEL_195", + "196": "LABEL_196", + "197": "LABEL_197", + "198": "LABEL_198", + "199": "LABEL_199", + "200": "LABEL_200", + "201": "LABEL_201", + "202": "LABEL_202", + "203": "LABEL_203", + "204": "LABEL_204", + "205": "LABEL_205", + "206": "LABEL_206", + "207": "LABEL_207", + "208": "LABEL_208", + "209": "LABEL_209", + "210": "LABEL_210", + "211": "LABEL_211", + "212": "LABEL_212", + "213": "LABEL_213", + "214": "LABEL_214", + "215": "LABEL_215", + "216": "LABEL_216", + "217": "LABEL_217", + "218": "LABEL_218", + "219": "LABEL_219", + "220": "LABEL_220", + "221": "LABEL_221", + "222": "LABEL_222", + "223": "LABEL_223", + "224": "LABEL_224", + "225": "LABEL_225", + "226": "LABEL_226", + "227": "LABEL_227", + "228": "LABEL_228", + "229": "LABEL_229", + "230": "LABEL_230", + "231": "LABEL_231", + "232": "LABEL_232", + "233": "LABEL_233", + "234": "LABEL_234", + "235": "LABEL_235", + "236": "LABEL_236", + "237": "LABEL_237", + "238": "LABEL_238", + "239": "LABEL_239", + "240": "LABEL_240", + "241": "LABEL_241", + "242": "LABEL_242", + "243": "LABEL_243", + "244": "LABEL_244", + "245": "LABEL_245", + "246": "LABEL_246", + "247": "LABEL_247", + "248": "LABEL_248", + "249": "LABEL_249", + "250": "LABEL_250", + "251": "LABEL_251", + "252": "LABEL_252", + "253": "LABEL_253", + "254": "LABEL_254", + "255": "LABEL_255", + "256": "LABEL_256", + "257": "LABEL_257", + "258": "LABEL_258", + "259": "LABEL_259", + "260": "LABEL_260", + "261": "LABEL_261", + "262": "LABEL_262", + "263": "LABEL_263", + "264": "LABEL_264", + "265": "LABEL_265", + "266": "LABEL_266", + "267": "LABEL_267", + "268": "LABEL_268", + "269": "LABEL_269", + "270": "LABEL_270", + "271": "LABEL_271", + "272": "LABEL_272", + "273": "LABEL_273", + "274": "LABEL_274", + "275": "LABEL_275", + "276": "LABEL_276", + "277": "LABEL_277", + "278": "LABEL_278", + "279": "LABEL_279", + "280": "LABEL_280", + "281": "LABEL_281", + "282": "LABEL_282", + "283": "LABEL_283", + "284": "LABEL_284", + "285": "LABEL_285", + "286": "LABEL_286", + "287": "LABEL_287", + "288": "LABEL_288", + "289": "LABEL_289", + "290": "LABEL_290", + "291": "LABEL_291", + "292": "LABEL_292", + "293": "LABEL_293", + "294": "LABEL_294", + "295": "LABEL_295", + "296": "LABEL_296", + "297": "LABEL_297", + "298": "LABEL_298", + "299": "LABEL_299", + "300": "LABEL_300", + "301": "LABEL_301", + "302": "LABEL_302", + "303": "LABEL_303", + "304": "LABEL_304", + "305": "LABEL_305", + "306": "LABEL_306", + "307": "LABEL_307", + "308": "LABEL_308", + "309": "LABEL_309", + "310": "LABEL_310", + "311": "LABEL_311", + "312": "LABEL_312", + "313": "LABEL_313", + "314": "LABEL_314", + "315": "LABEL_315", + "316": "LABEL_316", + "317": "LABEL_317", + "318": "LABEL_318", + "319": "LABEL_319", + "320": "LABEL_320", + "321": "LABEL_321", + "322": "LABEL_322", + "323": "LABEL_323", + "324": "LABEL_324", + "325": "LABEL_325", + "326": "LABEL_326", + "327": "LABEL_327", + "328": "LABEL_328", + "329": "LABEL_329", + "330": "LABEL_330", + "331": "LABEL_331", + "332": "LABEL_332", + "333": "LABEL_333", + "334": "LABEL_334", + "335": "LABEL_335", + "336": "LABEL_336", + "337": "LABEL_337", + "338": "LABEL_338", + "339": "LABEL_339", + "340": "LABEL_340", + "341": "LABEL_341", + "342": "LABEL_342", + "343": "LABEL_343", + "344": "LABEL_344", + "345": "LABEL_345", + "346": "LABEL_346", + "347": "LABEL_347", + "348": "LABEL_348", + "349": "LABEL_349", + "350": "LABEL_350", + "351": "LABEL_351", + "352": "LABEL_352", + "353": "LABEL_353", + "354": "LABEL_354", + "355": "LABEL_355", + "356": "LABEL_356", + "357": "LABEL_357", + "358": "LABEL_358", + "359": "LABEL_359", + "360": "LABEL_360", + "361": "LABEL_361", + "362": "LABEL_362", + "363": "LABEL_363", + "364": "LABEL_364", + "365": "LABEL_365", + "366": "LABEL_366", + "367": "LABEL_367", + "368": "LABEL_368", + "369": "LABEL_369", + "370": "LABEL_370", + "371": "LABEL_371", + "372": "LABEL_372", + "373": "LABEL_373", + "374": "LABEL_374", + "375": "LABEL_375", + "376": "LABEL_376", + "377": "LABEL_377", + "378": "LABEL_378", + "379": "LABEL_379", + "380": "LABEL_380", + "381": "LABEL_381", + "382": "LABEL_382", + "383": "LABEL_383", + "384": "LABEL_384", + "385": "LABEL_385", + "386": "LABEL_386", + "387": "LABEL_387", + "388": "LABEL_388", + "389": "LABEL_389", + "390": "LABEL_390", + "391": "LABEL_391", + "392": "LABEL_392", + "393": "LABEL_393", + "394": "LABEL_394", + "395": "LABEL_395", + "396": "LABEL_396", + "397": "LABEL_397", + "398": "LABEL_398", + "399": "LABEL_399", + "400": "LABEL_400", + "401": "LABEL_401", + "402": "LABEL_402", + "403": "LABEL_403", + "404": "LABEL_404", + "405": "LABEL_405", + "406": "LABEL_406", + "407": "LABEL_407", + "408": "LABEL_408", + "409": "LABEL_409", + "410": "LABEL_410", + "411": "LABEL_411", + "412": "LABEL_412", + "413": "LABEL_413", + "414": "LABEL_414", + "415": "LABEL_415", + "416": "LABEL_416", + "417": "LABEL_417", + "418": "LABEL_418", + "419": "LABEL_419", + "420": "LABEL_420", + "421": "LABEL_421", + "422": "LABEL_422", + "423": "LABEL_423", + "424": "LABEL_424", + "425": "LABEL_425", + "426": "LABEL_426", + "427": "LABEL_427", + "428": "LABEL_428", + "429": "LABEL_429", + "430": "LABEL_430", + "431": "LABEL_431", + "432": "LABEL_432", + "433": "LABEL_433", + "434": "LABEL_434", + "435": "LABEL_435", + "436": "LABEL_436", + "437": "LABEL_437", + "438": "LABEL_438", + "439": "LABEL_439", + "440": "LABEL_440", + "441": "LABEL_441", + "442": "LABEL_442", + "443": "LABEL_443", + "444": "LABEL_444", + "445": "LABEL_445", + "446": "LABEL_446", + "447": "LABEL_447", + "448": "LABEL_448", + "449": "LABEL_449", + "450": "LABEL_450", + "451": "LABEL_451", + "452": "LABEL_452", + "453": "LABEL_453", + "454": "LABEL_454", + "455": "LABEL_455", + "456": "LABEL_456", + "457": "LABEL_457", + "458": "LABEL_458", + "459": "LABEL_459", + "460": "LABEL_460", + "461": "LABEL_461", + "462": "LABEL_462", + "463": "LABEL_463", + "464": "LABEL_464", + "465": "LABEL_465", + "466": "LABEL_466", + "467": "LABEL_467", + "468": "LABEL_468", + "469": "LABEL_469", + "470": "LABEL_470", + "471": "LABEL_471", + "472": "LABEL_472", + "473": "LABEL_473", + "474": "LABEL_474", + "475": "LABEL_475", + "476": "LABEL_476", + "477": "LABEL_477", + "478": "LABEL_478", + "479": "LABEL_479", + "480": "LABEL_480", + "481": "LABEL_481", + "482": "LABEL_482", + "483": "LABEL_483", + "484": "LABEL_484", + "485": "LABEL_485", + "486": "LABEL_486", + "487": "LABEL_487", + "488": "LABEL_488", + "489": "LABEL_489", + "490": "LABEL_490", + "491": "LABEL_491", + "492": "LABEL_492", + "493": "LABEL_493", + "494": "LABEL_494", + "495": "LABEL_495", + "496": "LABEL_496", + "497": "LABEL_497", + "498": "LABEL_498", + "499": "LABEL_499", + "500": "LABEL_500", + "501": "LABEL_501", + "502": "LABEL_502", + "503": "LABEL_503", + "504": "LABEL_504", + "505": "LABEL_505", + "506": "LABEL_506", + "507": "LABEL_507", + "508": "LABEL_508", + "509": "LABEL_509", + "510": "LABEL_510", + "511": "LABEL_511", + "512": "LABEL_512", + "513": "LABEL_513", + "514": "LABEL_514", + "515": "LABEL_515", + "516": "LABEL_516", + "517": "LABEL_517", + "518": "LABEL_518", + "519": "LABEL_519", + "520": "LABEL_520", + "521": "LABEL_521", + "522": "LABEL_522", + "523": "LABEL_523", + "524": "LABEL_524", + "525": "LABEL_525", + "526": "LABEL_526", + "527": "LABEL_527", + "528": "LABEL_528", + "529": "LABEL_529", + "530": "LABEL_530", + "531": "LABEL_531", + "532": "LABEL_532", + "533": "LABEL_533", + "534": "LABEL_534", + "535": "LABEL_535", + "536": "LABEL_536", + "537": "LABEL_537", + "538": "LABEL_538", + "539": "LABEL_539", + "540": "LABEL_540", + "541": "LABEL_541", + "542": "LABEL_542", + "543": "LABEL_543", + "544": "LABEL_544", + "545": "LABEL_545", + "546": "LABEL_546", + "547": "LABEL_547", + "548": "LABEL_548", + "549": "LABEL_549", + "550": "LABEL_550", + "551": "LABEL_551", + "552": "LABEL_552", + "553": "LABEL_553", + "554": "LABEL_554", + "555": "LABEL_555", + "556": "LABEL_556", + "557": "LABEL_557", + "558": "LABEL_558", + "559": "LABEL_559", + "560": "LABEL_560", + "561": "LABEL_561", + "562": "LABEL_562", + "563": "LABEL_563", + "564": "LABEL_564", + "565": "LABEL_565", + "566": "LABEL_566", + "567": "LABEL_567", + "568": "LABEL_568", + "569": "LABEL_569", + "570": "LABEL_570", + "571": "LABEL_571", + "572": "LABEL_572", + "573": "LABEL_573", + "574": "LABEL_574", + "575": "LABEL_575", + "576": "LABEL_576", + "577": "LABEL_577", + "578": "LABEL_578", + "579": "LABEL_579", + "580": "LABEL_580", + "581": "LABEL_581", + "582": "LABEL_582", + "583": "LABEL_583", + "584": "LABEL_584", + "585": "LABEL_585", + "586": "LABEL_586", + "587": "LABEL_587", + "588": "LABEL_588", + "589": "LABEL_589", + "590": "LABEL_590", + "591": "LABEL_591", + "592": "LABEL_592", + "593": "LABEL_593", + "594": "LABEL_594", + "595": "LABEL_595", + "596": "LABEL_596", + "597": "LABEL_597", + "598": "LABEL_598", + "599": "LABEL_599", + "600": "LABEL_600", + "601": "LABEL_601", + "602": "LABEL_602", + "603": "LABEL_603", + "604": "LABEL_604", + "605": "LABEL_605", + "606": "LABEL_606", + "607": "LABEL_607", + "608": "LABEL_608", + "609": "LABEL_609", + "610": "LABEL_610", + "611": "LABEL_611", + "612": "LABEL_612", + "613": "LABEL_613", + "614": "LABEL_614", + "615": "LABEL_615", + "616": "LABEL_616", + "617": "LABEL_617", + "618": "LABEL_618", + "619": "LABEL_619", + "620": "LABEL_620", + "621": "LABEL_621", + "622": "LABEL_622", + "623": "LABEL_623", + "624": "LABEL_624", + "625": "LABEL_625", + "626": "LABEL_626", + "627": "LABEL_627", + "628": "LABEL_628", + "629": "LABEL_629", + "630": "LABEL_630", + "631": "LABEL_631", + "632": "LABEL_632", + "633": "LABEL_633", + "634": "LABEL_634", + "635": "LABEL_635", + "636": "LABEL_636", + "637": "LABEL_637", + "638": "LABEL_638", + "639": "LABEL_639", + "640": "LABEL_640", + "641": "LABEL_641", + "642": "LABEL_642", + "643": "LABEL_643", + "644": "LABEL_644", + "645": "LABEL_645", + "646": "LABEL_646", + "647": "LABEL_647", + "648": "LABEL_648", + "649": "LABEL_649", + "650": "LABEL_650", + "651": "LABEL_651", + "652": "LABEL_652", + "653": "LABEL_653", + "654": "LABEL_654", + "655": "LABEL_655", + "656": "LABEL_656", + "657": "LABEL_657", + "658": "LABEL_658", + "659": "LABEL_659", + "660": "LABEL_660", + "661": "LABEL_661", + "662": "LABEL_662", + "663": "LABEL_663", + "664": "LABEL_664", + "665": "LABEL_665", + "666": "LABEL_666", + "667": "LABEL_667", + "668": "LABEL_668", + "669": "LABEL_669", + "670": "LABEL_670", + "671": "LABEL_671", + "672": "LABEL_672", + "673": "LABEL_673", + "674": "LABEL_674", + "675": "LABEL_675", + "676": "LABEL_676", + "677": "LABEL_677", + "678": "LABEL_678", + "679": "LABEL_679", + "680": "LABEL_680", + "681": "LABEL_681", + "682": "LABEL_682", + "683": "LABEL_683", + "684": "LABEL_684", + "685": "LABEL_685", + "686": "LABEL_686", + "687": "LABEL_687", + "688": "LABEL_688", + "689": "LABEL_689", + "690": "LABEL_690", + "691": "LABEL_691", + "692": "LABEL_692", + "693": "LABEL_693", + "694": "LABEL_694", + "695": "LABEL_695", + "696": "LABEL_696", + "697": "LABEL_697", + "698": "LABEL_698", + "699": "LABEL_699", + "700": "LABEL_700", + "701": "LABEL_701", + "702": "LABEL_702", + "703": "LABEL_703", + "704": "LABEL_704", + "705": "LABEL_705", + "706": "LABEL_706", + "707": "LABEL_707", + "708": "LABEL_708", + "709": "LABEL_709", + "710": "LABEL_710", + "711": "LABEL_711", + "712": "LABEL_712", + "713": "LABEL_713", + "714": "LABEL_714", + "715": "LABEL_715", + "716": "LABEL_716", + "717": "LABEL_717", + "718": "LABEL_718", + "719": "LABEL_719", + "720": "LABEL_720", + "721": "LABEL_721", + "722": "LABEL_722", + "723": "LABEL_723", + "724": "LABEL_724", + "725": "LABEL_725", + "726": "LABEL_726", + "727": "LABEL_727", + "728": "LABEL_728", + "729": "LABEL_729", + "730": "LABEL_730", + "731": "LABEL_731", + "732": "LABEL_732", + "733": "LABEL_733", + "734": "LABEL_734", + "735": "LABEL_735", + "736": "LABEL_736", + "737": "LABEL_737", + "738": "LABEL_738", + "739": "LABEL_739", + "740": "LABEL_740", + "741": "LABEL_741", + "742": "LABEL_742", + "743": "LABEL_743", + "744": "LABEL_744", + "745": "LABEL_745", + "746": "LABEL_746", + "747": "LABEL_747", + "748": "LABEL_748", + "749": "LABEL_749", + "750": "LABEL_750", + "751": "LABEL_751", + "752": "LABEL_752", + "753": "LABEL_753", + "754": "LABEL_754", + "755": "LABEL_755", + "756": "LABEL_756", + "757": "LABEL_757", + "758": "LABEL_758", + "759": "LABEL_759", + "760": "LABEL_760", + "761": "LABEL_761", + "762": "LABEL_762", + "763": "LABEL_763", + "764": "LABEL_764", + "765": "LABEL_765", + "766": "LABEL_766", + "767": "LABEL_767", + "768": "LABEL_768", + "769": "LABEL_769", + "770": "LABEL_770", + "771": "LABEL_771", + "772": "LABEL_772", + "773": "LABEL_773", + "774": "LABEL_774", + "775": "LABEL_775", + "776": "LABEL_776", + "777": "LABEL_777", + "778": "LABEL_778", + "779": "LABEL_779", + "780": "LABEL_780", + "781": "LABEL_781", + "782": "LABEL_782", + "783": "LABEL_783", + "784": "LABEL_784", + "785": "LABEL_785", + "786": "LABEL_786", + "787": "LABEL_787", + "788": "LABEL_788", + "789": "LABEL_789", + "790": "LABEL_790", + "791": "LABEL_791", + "792": "LABEL_792", + "793": "LABEL_793", + "794": "LABEL_794", + "795": "LABEL_795", + "796": "LABEL_796", + "797": "LABEL_797", + "798": "LABEL_798", + "799": "LABEL_799", + "800": "LABEL_800", + "801": "LABEL_801", + "802": "LABEL_802", + "803": "LABEL_803", + "804": "LABEL_804", + "805": "LABEL_805", + "806": "LABEL_806", + "807": "LABEL_807", + "808": "LABEL_808", + "809": "LABEL_809", + "810": "LABEL_810", + "811": "LABEL_811", + "812": "LABEL_812", + "813": "LABEL_813", + "814": "LABEL_814", + "815": "LABEL_815", + "816": "LABEL_816", + "817": "LABEL_817", + "818": "LABEL_818", + "819": "LABEL_819", + "820": "LABEL_820", + "821": "LABEL_821", + "822": "LABEL_822", + "823": "LABEL_823", + "824": "LABEL_824", + "825": "LABEL_825", + "826": "LABEL_826", + "827": "LABEL_827", + "828": "LABEL_828", + "829": "LABEL_829", + "830": "LABEL_830", + "831": "LABEL_831", + "832": "LABEL_832", + "833": "LABEL_833", + "834": "LABEL_834", + "835": "LABEL_835", + "836": "LABEL_836", + "837": "LABEL_837", + "838": "LABEL_838", + "839": "LABEL_839", + "840": "LABEL_840", + "841": "LABEL_841", + "842": "LABEL_842", + "843": "LABEL_843", + "844": "LABEL_844", + "845": "LABEL_845", + "846": "LABEL_846", + "847": "LABEL_847", + "848": "LABEL_848", + "849": "LABEL_849", + "850": "LABEL_850", + "851": "LABEL_851", + "852": "LABEL_852", + "853": "LABEL_853", + "854": "LABEL_854", + "855": "LABEL_855", + "856": "LABEL_856", + "857": "LABEL_857", + "858": "LABEL_858", + "859": "LABEL_859", + "860": "LABEL_860", + "861": "LABEL_861", + "862": "LABEL_862", + "863": "LABEL_863", + "864": "LABEL_864", + "865": "LABEL_865", + "866": "LABEL_866", + "867": "LABEL_867", + "868": "LABEL_868", + "869": "LABEL_869", + "870": "LABEL_870", + "871": "LABEL_871", + "872": "LABEL_872", + "873": "LABEL_873", + "874": "LABEL_874", + "875": "LABEL_875", + "876": "LABEL_876", + "877": "LABEL_877", + "878": "LABEL_878", + "879": "LABEL_879", + "880": "LABEL_880", + "881": "LABEL_881", + "882": "LABEL_882", + "883": "LABEL_883", + "884": "LABEL_884", + "885": "LABEL_885", + "886": "LABEL_886", + "887": "LABEL_887", + "888": "LABEL_888", + "889": "LABEL_889", + "890": "LABEL_890", + "891": "LABEL_891", + "892": "LABEL_892", + "893": "LABEL_893", + "894": "LABEL_894", + "895": "LABEL_895", + "896": "LABEL_896", + "897": "LABEL_897", + "898": "LABEL_898", + "899": "LABEL_899", + "900": "LABEL_900", + "901": "LABEL_901", + "902": "LABEL_902", + "903": "LABEL_903", + "904": "LABEL_904", + "905": "LABEL_905", + "906": "LABEL_906", + "907": "LABEL_907", + "908": "LABEL_908", + "909": "LABEL_909", + "910": "LABEL_910", + "911": "LABEL_911", + "912": "LABEL_912", + "913": "LABEL_913", + "914": "LABEL_914", + "915": "LABEL_915", + "916": "LABEL_916", + "917": "LABEL_917", + "918": "LABEL_918", + "919": "LABEL_919", + "920": "LABEL_920", + "921": "LABEL_921", + "922": "LABEL_922", + "923": "LABEL_923", + "924": "LABEL_924", + "925": "LABEL_925", + "926": "LABEL_926", + "927": "LABEL_927", + "928": "LABEL_928", + "929": "LABEL_929", + "930": "LABEL_930", + "931": "LABEL_931", + "932": "LABEL_932", + "933": "LABEL_933", + "934": "LABEL_934", + "935": "LABEL_935", + "936": "LABEL_936", + "937": "LABEL_937", + "938": "LABEL_938", + "939": "LABEL_939", + "940": "LABEL_940", + "941": "LABEL_941", + "942": "LABEL_942", + "943": "LABEL_943", + "944": "LABEL_944", + "945": "LABEL_945", + "946": "LABEL_946", + "947": "LABEL_947", + "948": "LABEL_948", + "949": "LABEL_949", + "950": "LABEL_950", + "951": "LABEL_951", + "952": "LABEL_952", + "953": "LABEL_953", + "954": "LABEL_954", + "955": "LABEL_955", + "956": "LABEL_956", + "957": "LABEL_957", + "958": "LABEL_958", + "959": "LABEL_959", + "960": "LABEL_960", + "961": "LABEL_961", + "962": "LABEL_962", + "963": "LABEL_963", + "964": "LABEL_964", + "965": "LABEL_965", + "966": "LABEL_966", + "967": "LABEL_967", + "968": "LABEL_968", + "969": "LABEL_969", + "970": "LABEL_970", + "971": "LABEL_971", + "972": "LABEL_972", + "973": "LABEL_973", + "974": "LABEL_974", + "975": "LABEL_975", + "976": "LABEL_976", + "977": "LABEL_977", + "978": "LABEL_978", + "979": "LABEL_979", + "980": "LABEL_980", + "981": "LABEL_981", + "982": "LABEL_982", + "983": "LABEL_983", + "984": "LABEL_984", + "985": "LABEL_985", + "986": "LABEL_986", + "987": "LABEL_987", + "988": "LABEL_988", + "989": "LABEL_989", + "990": "LABEL_990", + "991": "LABEL_991", + "992": "LABEL_992", + "993": "LABEL_993", + "994": "LABEL_994", + "995": "LABEL_995", + "996": "LABEL_996", + "997": "LABEL_997", + "998": "LABEL_998", + "999": "LABEL_999", + "1000": "LABEL_1000", + "1001": "LABEL_1001", + "1002": "LABEL_1002", + "1003": "LABEL_1003", + "1004": "LABEL_1004", + "1005": "LABEL_1005", + "1006": "LABEL_1006", + "1007": "LABEL_1007", + "1008": "LABEL_1008", + "1009": "LABEL_1009", + "1010": "LABEL_1010", + "1011": "LABEL_1011", + "1012": "LABEL_1012", + "1013": "LABEL_1013", + "1014": "LABEL_1014", + "1015": "LABEL_1015", + "1016": "LABEL_1016", + "1017": "LABEL_1017", + "1018": "LABEL_1018", + "1019": "LABEL_1019", + "1020": "LABEL_1020", + "1021": "LABEL_1021", + "1022": "LABEL_1022", + "1023": "LABEL_1023", + "1024": "LABEL_1024", + "1025": "LABEL_1025", + "1026": "LABEL_1026", + "1027": "LABEL_1027", + "1028": "LABEL_1028", + "1029": "LABEL_1029", + "1030": "LABEL_1030", + "1031": "LABEL_1031", + "1032": "LABEL_1032", + "1033": "LABEL_1033", + "1034": "LABEL_1034", + "1035": "LABEL_1035", + "1036": "LABEL_1036", + "1037": "LABEL_1037", + "1038": "LABEL_1038", + "1039": "LABEL_1039", + "1040": "LABEL_1040", + "1041": "LABEL_1041", + "1042": "LABEL_1042", + "1043": "LABEL_1043", + "1044": "LABEL_1044", + "1045": "LABEL_1045", + "1046": "LABEL_1046", + "1047": "LABEL_1047", + "1048": "LABEL_1048", + "1049": "LABEL_1049", + "1050": "LABEL_1050", + "1051": "LABEL_1051", + "1052": "LABEL_1052", + "1053": "LABEL_1053", + "1054": "LABEL_1054", + "1055": "LABEL_1055", + "1056": "LABEL_1056", + "1057": "LABEL_1057", + "1058": "LABEL_1058", + "1059": "LABEL_1059", + "1060": "LABEL_1060", + "1061": "LABEL_1061", + "1062": "LABEL_1062", + "1063": "LABEL_1063", + "1064": "LABEL_1064", + "1065": "LABEL_1065", + "1066": "LABEL_1066", + "1067": "LABEL_1067", + "1068": "LABEL_1068", + "1069": "LABEL_1069", + "1070": "LABEL_1070", + "1071": "LABEL_1071", + "1072": "LABEL_1072", + "1073": "LABEL_1073", + "1074": "LABEL_1074", + "1075": "LABEL_1075", + "1076": "LABEL_1076", + "1077": "LABEL_1077", + "1078": "LABEL_1078", + "1079": "LABEL_1079", + "1080": "LABEL_1080", + "1081": "LABEL_1081", + "1082": "LABEL_1082", + "1083": "LABEL_1083", + "1084": "LABEL_1084", + "1085": "LABEL_1085", + "1086": "LABEL_1086", + "1087": "LABEL_1087", + "1088": "LABEL_1088", + "1089": "LABEL_1089", + "1090": "LABEL_1090", + "1091": "LABEL_1091", + "1092": "LABEL_1092", + "1093": "LABEL_1093", + "1094": "LABEL_1094", + "1095": "LABEL_1095", + "1096": "LABEL_1096", + "1097": "LABEL_1097", + "1098": "LABEL_1098", + "1099": "LABEL_1099", + "1100": "LABEL_1100", + "1101": "LABEL_1101", + "1102": "LABEL_1102", + "1103": "LABEL_1103", + "1104": "LABEL_1104", + "1105": "LABEL_1105", + "1106": "LABEL_1106", + "1107": "LABEL_1107", + "1108": "LABEL_1108", + "1109": "LABEL_1109", + "1110": "LABEL_1110", + "1111": "LABEL_1111", + "1112": "LABEL_1112", + "1113": "LABEL_1113", + "1114": "LABEL_1114", + "1115": "LABEL_1115", + "1116": "LABEL_1116", + "1117": "LABEL_1117", + "1118": "LABEL_1118", + "1119": "LABEL_1119", + "1120": "LABEL_1120", + "1121": "LABEL_1121", + "1122": "LABEL_1122", + "1123": "LABEL_1123", + "1124": "LABEL_1124", + "1125": "LABEL_1125", + "1126": "LABEL_1126", + "1127": "LABEL_1127", + "1128": "LABEL_1128", + "1129": "LABEL_1129", + "1130": "LABEL_1130", + "1131": "LABEL_1131", + "1132": "LABEL_1132", + "1133": "LABEL_1133", + "1134": "LABEL_1134", + "1135": "LABEL_1135", + "1136": "LABEL_1136", + "1137": "LABEL_1137", + "1138": "LABEL_1138", + "1139": "LABEL_1139", + "1140": "LABEL_1140", + "1141": "LABEL_1141", + "1142": "LABEL_1142", + "1143": "LABEL_1143", + "1144": "LABEL_1144", + "1145": "LABEL_1145", + "1146": "LABEL_1146", + "1147": "LABEL_1147", + "1148": "LABEL_1148", + "1149": "LABEL_1149", + "1150": "LABEL_1150", + "1151": "LABEL_1151", + "1152": "LABEL_1152", + "1153": "LABEL_1153", + "1154": "LABEL_1154", + "1155": "LABEL_1155", + "1156": "LABEL_1156", + "1157": "LABEL_1157", + "1158": "LABEL_1158", + "1159": "LABEL_1159", + "1160": "LABEL_1160", + "1161": "LABEL_1161", + "1162": "LABEL_1162", + "1163": "LABEL_1163", + "1164": "LABEL_1164", + "1165": "LABEL_1165", + "1166": "LABEL_1166", + "1167": "LABEL_1167", + "1168": "LABEL_1168", + "1169": "LABEL_1169", + "1170": "LABEL_1170", + "1171": "LABEL_1171", + "1172": "LABEL_1172", + "1173": "LABEL_1173", + "1174": "LABEL_1174", + "1175": "LABEL_1175", + "1176": "LABEL_1176", + "1177": "LABEL_1177", + "1178": "LABEL_1178", + "1179": "LABEL_1179", + "1180": "LABEL_1180", + "1181": "LABEL_1181", + "1182": "LABEL_1182", + "1183": "LABEL_1183", + "1184": "LABEL_1184", + "1185": "LABEL_1185", + "1186": "LABEL_1186", + "1187": "LABEL_1187", + "1188": "LABEL_1188", + "1189": "LABEL_1189", + "1190": "LABEL_1190", + "1191": "LABEL_1191", + "1192": "LABEL_1192", + "1193": "LABEL_1193", + "1194": "LABEL_1194", + "1195": "LABEL_1195", + "1196": "LABEL_1196", + "1197": "LABEL_1197", + "1198": "LABEL_1198", + "1199": "LABEL_1199", + "1200": "LABEL_1200", + "1201": "LABEL_1201", + "1202": "LABEL_1202", + "1203": "LABEL_1203", + "1204": "LABEL_1204", + "1205": "LABEL_1205", + "1206": "LABEL_1206", + "1207": "LABEL_1207", + "1208": "LABEL_1208", + "1209": "LABEL_1209", + "1210": "LABEL_1210" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_10": 10, + "LABEL_100": 100, + "LABEL_1000": 1000, + "LABEL_1001": 1001, + "LABEL_1002": 1002, + "LABEL_1003": 1003, + "LABEL_1004": 1004, + "LABEL_1005": 1005, + "LABEL_1006": 1006, + "LABEL_1007": 1007, + "LABEL_1008": 1008, + "LABEL_1009": 1009, + "LABEL_101": 101, + "LABEL_1010": 1010, + "LABEL_1011": 1011, + "LABEL_1012": 1012, + "LABEL_1013": 1013, + "LABEL_1014": 1014, + "LABEL_1015": 1015, + "LABEL_1016": 1016, + "LABEL_1017": 1017, + "LABEL_1018": 1018, + "LABEL_1019": 1019, + "LABEL_102": 102, + "LABEL_1020": 1020, + "LABEL_1021": 1021, + "LABEL_1022": 1022, + "LABEL_1023": 1023, + "LABEL_1024": 1024, + "LABEL_1025": 1025, + "LABEL_1026": 1026, + "LABEL_1027": 1027, + "LABEL_1028": 1028, + "LABEL_1029": 1029, + "LABEL_103": 103, + "LABEL_1030": 1030, + "LABEL_1031": 1031, + "LABEL_1032": 1032, + "LABEL_1033": 1033, + "LABEL_1034": 1034, + "LABEL_1035": 1035, + "LABEL_1036": 1036, + "LABEL_1037": 1037, + "LABEL_1038": 1038, + "LABEL_1039": 1039, + "LABEL_104": 104, + "LABEL_1040": 1040, + "LABEL_1041": 1041, + "LABEL_1042": 1042, + "LABEL_1043": 1043, + "LABEL_1044": 1044, + "LABEL_1045": 1045, + "LABEL_1046": 1046, + "LABEL_1047": 1047, + "LABEL_1048": 1048, + "LABEL_1049": 1049, + "LABEL_105": 105, + "LABEL_1050": 1050, + "LABEL_1051": 1051, + "LABEL_1052": 1052, + "LABEL_1053": 1053, + "LABEL_1054": 1054, + "LABEL_1055": 1055, + "LABEL_1056": 1056, + "LABEL_1057": 1057, + "LABEL_1058": 1058, + "LABEL_1059": 1059, + "LABEL_106": 106, + "LABEL_1060": 1060, + "LABEL_1061": 1061, + "LABEL_1062": 1062, + "LABEL_1063": 1063, + "LABEL_1064": 1064, + "LABEL_1065": 1065, + "LABEL_1066": 1066, + "LABEL_1067": 1067, + "LABEL_1068": 1068, + "LABEL_1069": 1069, + "LABEL_107": 107, + "LABEL_1070": 1070, + "LABEL_1071": 1071, + "LABEL_1072": 1072, + "LABEL_1073": 1073, + "LABEL_1074": 1074, + "LABEL_1075": 1075, + "LABEL_1076": 1076, + "LABEL_1077": 1077, + "LABEL_1078": 1078, + "LABEL_1079": 1079, + "LABEL_108": 108, + "LABEL_1080": 1080, + "LABEL_1081": 1081, + "LABEL_1082": 1082, + "LABEL_1083": 1083, + "LABEL_1084": 1084, + "LABEL_1085": 1085, + "LABEL_1086": 1086, + "LABEL_1087": 1087, + "LABEL_1088": 1088, + "LABEL_1089": 1089, + "LABEL_109": 109, + "LABEL_1090": 1090, + "LABEL_1091": 1091, + "LABEL_1092": 1092, + "LABEL_1093": 1093, + "LABEL_1094": 1094, + "LABEL_1095": 1095, + "LABEL_1096": 1096, + "LABEL_1097": 1097, + "LABEL_1098": 1098, + "LABEL_1099": 1099, + "LABEL_11": 11, + "LABEL_110": 110, + "LABEL_1100": 1100, + "LABEL_1101": 1101, + "LABEL_1102": 1102, + "LABEL_1103": 1103, + "LABEL_1104": 1104, + "LABEL_1105": 1105, + "LABEL_1106": 1106, + "LABEL_1107": 1107, + "LABEL_1108": 1108, + "LABEL_1109": 1109, + "LABEL_111": 111, + "LABEL_1110": 1110, + "LABEL_1111": 1111, + "LABEL_1112": 1112, + "LABEL_1113": 1113, + "LABEL_1114": 1114, + "LABEL_1115": 1115, + "LABEL_1116": 1116, + "LABEL_1117": 1117, + "LABEL_1118": 1118, + "LABEL_1119": 1119, + "LABEL_112": 112, + "LABEL_1120": 1120, + "LABEL_1121": 1121, + "LABEL_1122": 1122, + "LABEL_1123": 1123, + "LABEL_1124": 1124, + "LABEL_1125": 1125, + "LABEL_1126": 1126, + "LABEL_1127": 1127, + "LABEL_1128": 1128, + "LABEL_1129": 1129, + "LABEL_113": 113, + "LABEL_1130": 1130, + "LABEL_1131": 1131, + "LABEL_1132": 1132, + "LABEL_1133": 1133, + "LABEL_1134": 1134, + "LABEL_1135": 1135, + "LABEL_1136": 1136, + "LABEL_1137": 1137, + "LABEL_1138": 1138, + "LABEL_1139": 1139, + "LABEL_114": 114, + "LABEL_1140": 1140, + "LABEL_1141": 1141, + "LABEL_1142": 1142, + "LABEL_1143": 1143, + "LABEL_1144": 1144, + "LABEL_1145": 1145, + "LABEL_1146": 1146, + "LABEL_1147": 1147, + "LABEL_1148": 1148, + "LABEL_1149": 1149, + "LABEL_115": 115, + "LABEL_1150": 1150, + "LABEL_1151": 1151, + "LABEL_1152": 1152, + "LABEL_1153": 1153, + "LABEL_1154": 1154, + "LABEL_1155": 1155, + "LABEL_1156": 1156, + "LABEL_1157": 1157, + "LABEL_1158": 1158, + "LABEL_1159": 1159, + "LABEL_116": 116, + "LABEL_1160": 1160, + "LABEL_1161": 1161, + "LABEL_1162": 1162, + "LABEL_1163": 1163, + "LABEL_1164": 1164, + "LABEL_1165": 1165, + "LABEL_1166": 1166, + "LABEL_1167": 1167, + "LABEL_1168": 1168, + "LABEL_1169": 1169, + "LABEL_117": 117, + "LABEL_1170": 1170, + "LABEL_1171": 1171, + "LABEL_1172": 1172, + "LABEL_1173": 1173, + "LABEL_1174": 1174, + "LABEL_1175": 1175, + "LABEL_1176": 1176, + "LABEL_1177": 1177, + "LABEL_1178": 1178, + "LABEL_1179": 1179, + "LABEL_118": 118, + "LABEL_1180": 1180, + "LABEL_1181": 1181, + "LABEL_1182": 1182, + "LABEL_1183": 1183, + "LABEL_1184": 1184, + "LABEL_1185": 1185, + "LABEL_1186": 1186, + "LABEL_1187": 1187, + "LABEL_1188": 1188, + "LABEL_1189": 1189, + "LABEL_119": 119, + "LABEL_1190": 1190, + "LABEL_1191": 1191, + "LABEL_1192": 1192, + "LABEL_1193": 1193, + "LABEL_1194": 1194, + "LABEL_1195": 1195, + "LABEL_1196": 1196, + "LABEL_1197": 1197, + "LABEL_1198": 1198, + "LABEL_1199": 1199, + "LABEL_12": 12, + "LABEL_120": 120, + "LABEL_1200": 1200, + "LABEL_1201": 1201, + "LABEL_1202": 1202, + "LABEL_1203": 1203, + "LABEL_1204": 1204, + "LABEL_1205": 1205, + "LABEL_1206": 1206, + "LABEL_1207": 1207, + "LABEL_1208": 1208, + "LABEL_1209": 1209, + "LABEL_121": 121, + "LABEL_1210": 1210, + "LABEL_122": 122, + "LABEL_123": 123, + "LABEL_124": 124, + "LABEL_125": 125, + "LABEL_126": 126, + "LABEL_127": 127, + "LABEL_128": 128, + "LABEL_129": 129, + "LABEL_13": 13, + "LABEL_130": 130, + "LABEL_131": 131, + "LABEL_132": 132, + "LABEL_133": 133, + "LABEL_134": 134, + "LABEL_135": 135, + "LABEL_136": 136, + "LABEL_137": 137, + "LABEL_138": 138, + "LABEL_139": 139, + "LABEL_14": 14, + "LABEL_140": 140, + "LABEL_141": 141, + "LABEL_142": 142, + "LABEL_143": 143, + "LABEL_144": 144, + "LABEL_145": 145, + "LABEL_146": 146, + "LABEL_147": 147, + "LABEL_148": 148, + "LABEL_149": 149, + "LABEL_15": 15, + "LABEL_150": 150, + "LABEL_151": 151, + "LABEL_152": 152, + "LABEL_153": 153, + "LABEL_154": 154, + "LABEL_155": 155, + "LABEL_156": 156, + "LABEL_157": 157, + "LABEL_158": 158, + "LABEL_159": 159, + "LABEL_16": 16, + "LABEL_160": 160, + "LABEL_161": 161, + "LABEL_162": 162, + "LABEL_163": 163, + "LABEL_164": 164, + "LABEL_165": 165, + "LABEL_166": 166, + "LABEL_167": 167, + "LABEL_168": 168, + "LABEL_169": 169, + "LABEL_17": 17, + "LABEL_170": 170, + "LABEL_171": 171, + "LABEL_172": 172, + "LABEL_173": 173, + "LABEL_174": 174, + "LABEL_175": 175, + "LABEL_176": 176, + "LABEL_177": 177, + "LABEL_178": 178, + "LABEL_179": 179, + "LABEL_18": 18, + "LABEL_180": 180, + "LABEL_181": 181, + "LABEL_182": 182, + "LABEL_183": 183, + "LABEL_184": 184, + "LABEL_185": 185, + "LABEL_186": 186, + "LABEL_187": 187, + "LABEL_188": 188, + "LABEL_189": 189, + "LABEL_19": 19, + "LABEL_190": 190, + "LABEL_191": 191, + "LABEL_192": 192, + "LABEL_193": 193, + "LABEL_194": 194, + "LABEL_195": 195, + "LABEL_196": 196, + "LABEL_197": 197, + "LABEL_198": 198, + "LABEL_199": 199, + "LABEL_2": 2, + "LABEL_20": 20, + "LABEL_200": 200, + "LABEL_201": 201, + "LABEL_202": 202, + "LABEL_203": 203, + "LABEL_204": 204, + "LABEL_205": 205, + "LABEL_206": 206, + "LABEL_207": 207, + "LABEL_208": 208, + "LABEL_209": 209, + "LABEL_21": 21, + "LABEL_210": 210, + "LABEL_211": 211, + "LABEL_212": 212, + "LABEL_213": 213, + "LABEL_214": 214, + "LABEL_215": 215, + "LABEL_216": 216, + "LABEL_217": 217, + "LABEL_218": 218, + "LABEL_219": 219, + "LABEL_22": 22, + "LABEL_220": 220, + "LABEL_221": 221, + "LABEL_222": 222, + "LABEL_223": 223, + "LABEL_224": 224, + "LABEL_225": 225, + "LABEL_226": 226, + "LABEL_227": 227, + "LABEL_228": 228, + "LABEL_229": 229, + "LABEL_23": 23, + "LABEL_230": 230, + "LABEL_231": 231, + "LABEL_232": 232, + "LABEL_233": 233, + "LABEL_234": 234, + "LABEL_235": 235, + "LABEL_236": 236, + "LABEL_237": 237, + "LABEL_238": 238, + "LABEL_239": 239, + "LABEL_24": 24, + "LABEL_240": 240, + "LABEL_241": 241, + "LABEL_242": 242, + "LABEL_243": 243, + "LABEL_244": 244, + "LABEL_245": 245, + "LABEL_246": 246, + "LABEL_247": 247, + "LABEL_248": 248, + "LABEL_249": 249, + "LABEL_25": 25, + "LABEL_250": 250, + "LABEL_251": 251, + "LABEL_252": 252, + "LABEL_253": 253, + "LABEL_254": 254, + "LABEL_255": 255, + "LABEL_256": 256, + "LABEL_257": 257, + "LABEL_258": 258, + "LABEL_259": 259, + "LABEL_26": 26, + "LABEL_260": 260, + "LABEL_261": 261, + "LABEL_262": 262, + "LABEL_263": 263, + "LABEL_264": 264, + "LABEL_265": 265, + "LABEL_266": 266, + "LABEL_267": 267, + "LABEL_268": 268, + "LABEL_269": 269, + "LABEL_27": 27, + "LABEL_270": 270, + "LABEL_271": 271, + "LABEL_272": 272, + "LABEL_273": 273, + "LABEL_274": 274, + "LABEL_275": 275, + "LABEL_276": 276, + "LABEL_277": 277, + "LABEL_278": 278, + "LABEL_279": 279, + "LABEL_28": 28, + "LABEL_280": 280, + "LABEL_281": 281, + "LABEL_282": 282, + "LABEL_283": 283, + "LABEL_284": 284, + "LABEL_285": 285, + "LABEL_286": 286, + "LABEL_287": 287, + "LABEL_288": 288, + "LABEL_289": 289, + "LABEL_29": 29, + "LABEL_290": 290, + "LABEL_291": 291, + "LABEL_292": 292, + "LABEL_293": 293, + "LABEL_294": 294, + "LABEL_295": 295, + "LABEL_296": 296, + "LABEL_297": 297, + "LABEL_298": 298, + "LABEL_299": 299, + "LABEL_3": 3, + "LABEL_30": 30, + "LABEL_300": 300, + "LABEL_301": 301, + "LABEL_302": 302, + "LABEL_303": 303, + "LABEL_304": 304, + "LABEL_305": 305, + "LABEL_306": 306, + "LABEL_307": 307, + "LABEL_308": 308, + "LABEL_309": 309, + "LABEL_31": 31, + "LABEL_310": 310, + "LABEL_311": 311, + "LABEL_312": 312, + "LABEL_313": 313, + "LABEL_314": 314, + "LABEL_315": 315, + "LABEL_316": 316, + "LABEL_317": 317, + "LABEL_318": 318, + "LABEL_319": 319, + "LABEL_32": 32, + "LABEL_320": 320, + "LABEL_321": 321, + "LABEL_322": 322, + "LABEL_323": 323, + "LABEL_324": 324, + "LABEL_325": 325, + "LABEL_326": 326, + "LABEL_327": 327, + "LABEL_328": 328, + "LABEL_329": 329, + "LABEL_33": 33, + "LABEL_330": 330, + "LABEL_331": 331, + "LABEL_332": 332, + "LABEL_333": 333, + "LABEL_334": 334, + "LABEL_335": 335, + "LABEL_336": 336, + "LABEL_337": 337, + "LABEL_338": 338, + "LABEL_339": 339, + "LABEL_34": 34, + "LABEL_340": 340, + "LABEL_341": 341, + "LABEL_342": 342, + "LABEL_343": 343, + "LABEL_344": 344, + "LABEL_345": 345, + "LABEL_346": 346, + "LABEL_347": 347, + "LABEL_348": 348, + "LABEL_349": 349, + "LABEL_35": 35, + "LABEL_350": 350, + "LABEL_351": 351, + "LABEL_352": 352, + "LABEL_353": 353, + "LABEL_354": 354, + "LABEL_355": 355, + "LABEL_356": 356, + "LABEL_357": 357, + "LABEL_358": 358, + "LABEL_359": 359, + "LABEL_36": 36, + "LABEL_360": 360, + "LABEL_361": 361, + "LABEL_362": 362, + "LABEL_363": 363, + "LABEL_364": 364, + "LABEL_365": 365, + "LABEL_366": 366, + "LABEL_367": 367, + "LABEL_368": 368, + "LABEL_369": 369, + "LABEL_37": 37, + "LABEL_370": 370, + "LABEL_371": 371, + "LABEL_372": 372, + "LABEL_373": 373, + "LABEL_374": 374, + "LABEL_375": 375, + "LABEL_376": 376, + "LABEL_377": 377, + "LABEL_378": 378, + "LABEL_379": 379, + "LABEL_38": 38, + "LABEL_380": 380, + "LABEL_381": 381, + "LABEL_382": 382, + "LABEL_383": 383, + "LABEL_384": 384, + "LABEL_385": 385, + "LABEL_386": 386, + "LABEL_387": 387, + "LABEL_388": 388, + "LABEL_389": 389, + "LABEL_39": 39, + "LABEL_390": 390, + "LABEL_391": 391, + "LABEL_392": 392, + "LABEL_393": 393, + "LABEL_394": 394, + "LABEL_395": 395, + "LABEL_396": 396, + "LABEL_397": 397, + "LABEL_398": 398, + "LABEL_399": 399, + "LABEL_4": 4, + "LABEL_40": 40, + "LABEL_400": 400, + "LABEL_401": 401, + "LABEL_402": 402, + "LABEL_403": 403, + "LABEL_404": 404, + "LABEL_405": 405, + "LABEL_406": 406, + "LABEL_407": 407, + "LABEL_408": 408, + "LABEL_409": 409, + "LABEL_41": 41, + "LABEL_410": 410, + "LABEL_411": 411, + "LABEL_412": 412, + "LABEL_413": 413, + "LABEL_414": 414, + "LABEL_415": 415, + "LABEL_416": 416, + "LABEL_417": 417, + "LABEL_418": 418, + "LABEL_419": 419, + "LABEL_42": 42, + "LABEL_420": 420, + "LABEL_421": 421, + "LABEL_422": 422, + "LABEL_423": 423, + "LABEL_424": 424, + "LABEL_425": 425, + "LABEL_426": 426, + "LABEL_427": 427, + "LABEL_428": 428, + "LABEL_429": 429, + "LABEL_43": 43, + "LABEL_430": 430, + "LABEL_431": 431, + "LABEL_432": 432, + "LABEL_433": 433, + "LABEL_434": 434, + "LABEL_435": 435, + "LABEL_436": 436, + "LABEL_437": 437, + "LABEL_438": 438, + "LABEL_439": 439, + "LABEL_44": 44, + "LABEL_440": 440, + "LABEL_441": 441, + "LABEL_442": 442, + "LABEL_443": 443, + "LABEL_444": 444, + "LABEL_445": 445, + "LABEL_446": 446, + "LABEL_447": 447, + "LABEL_448": 448, + "LABEL_449": 449, + "LABEL_45": 45, + "LABEL_450": 450, + "LABEL_451": 451, + "LABEL_452": 452, + "LABEL_453": 453, + "LABEL_454": 454, + "LABEL_455": 455, + "LABEL_456": 456, + "LABEL_457": 457, + "LABEL_458": 458, + "LABEL_459": 459, + "LABEL_46": 46, + "LABEL_460": 460, + "LABEL_461": 461, + "LABEL_462": 462, + "LABEL_463": 463, + "LABEL_464": 464, + "LABEL_465": 465, + "LABEL_466": 466, + "LABEL_467": 467, + "LABEL_468": 468, + "LABEL_469": 469, + "LABEL_47": 47, + "LABEL_470": 470, + "LABEL_471": 471, + "LABEL_472": 472, + "LABEL_473": 473, + "LABEL_474": 474, + "LABEL_475": 475, + "LABEL_476": 476, + "LABEL_477": 477, + "LABEL_478": 478, + "LABEL_479": 479, + "LABEL_48": 48, + "LABEL_480": 480, + "LABEL_481": 481, + "LABEL_482": 482, + "LABEL_483": 483, + "LABEL_484": 484, + "LABEL_485": 485, + "LABEL_486": 486, + "LABEL_487": 487, + "LABEL_488": 488, + "LABEL_489": 489, + "LABEL_49": 49, + "LABEL_490": 490, + "LABEL_491": 491, + "LABEL_492": 492, + "LABEL_493": 493, + "LABEL_494": 494, + "LABEL_495": 495, + "LABEL_496": 496, + "LABEL_497": 497, + "LABEL_498": 498, + "LABEL_499": 499, + "LABEL_5": 5, + "LABEL_50": 50, + "LABEL_500": 500, + "LABEL_501": 501, + "LABEL_502": 502, + "LABEL_503": 503, + "LABEL_504": 504, + "LABEL_505": 505, + "LABEL_506": 506, + "LABEL_507": 507, + "LABEL_508": 508, + "LABEL_509": 509, + "LABEL_51": 51, + "LABEL_510": 510, + "LABEL_511": 511, + "LABEL_512": 512, + "LABEL_513": 513, + "LABEL_514": 514, + "LABEL_515": 515, + "LABEL_516": 516, + "LABEL_517": 517, + "LABEL_518": 518, + "LABEL_519": 519, + "LABEL_52": 52, + "LABEL_520": 520, + "LABEL_521": 521, + "LABEL_522": 522, + "LABEL_523": 523, + "LABEL_524": 524, + "LABEL_525": 525, + "LABEL_526": 526, + "LABEL_527": 527, + "LABEL_528": 528, + "LABEL_529": 529, + "LABEL_53": 53, + "LABEL_530": 530, + "LABEL_531": 531, + "LABEL_532": 532, + "LABEL_533": 533, + "LABEL_534": 534, + "LABEL_535": 535, + "LABEL_536": 536, + "LABEL_537": 537, + "LABEL_538": 538, + "LABEL_539": 539, + "LABEL_54": 54, + "LABEL_540": 540, + "LABEL_541": 541, + "LABEL_542": 542, + "LABEL_543": 543, + "LABEL_544": 544, + "LABEL_545": 545, + "LABEL_546": 546, + "LABEL_547": 547, + "LABEL_548": 548, + "LABEL_549": 549, + "LABEL_55": 55, + "LABEL_550": 550, + "LABEL_551": 551, + "LABEL_552": 552, + "LABEL_553": 553, + "LABEL_554": 554, + "LABEL_555": 555, + "LABEL_556": 556, + "LABEL_557": 557, + "LABEL_558": 558, + "LABEL_559": 559, + "LABEL_56": 56, + "LABEL_560": 560, + "LABEL_561": 561, + "LABEL_562": 562, + "LABEL_563": 563, + "LABEL_564": 564, + "LABEL_565": 565, + "LABEL_566": 566, + "LABEL_567": 567, + "LABEL_568": 568, + "LABEL_569": 569, + "LABEL_57": 57, + "LABEL_570": 570, + "LABEL_571": 571, + "LABEL_572": 572, + "LABEL_573": 573, + "LABEL_574": 574, + "LABEL_575": 575, + "LABEL_576": 576, + "LABEL_577": 577, + "LABEL_578": 578, + "LABEL_579": 579, + "LABEL_58": 58, + "LABEL_580": 580, + "LABEL_581": 581, + "LABEL_582": 582, + "LABEL_583": 583, + "LABEL_584": 584, + "LABEL_585": 585, + "LABEL_586": 586, + "LABEL_587": 587, + "LABEL_588": 588, + "LABEL_589": 589, + "LABEL_59": 59, + "LABEL_590": 590, + "LABEL_591": 591, + "LABEL_592": 592, + "LABEL_593": 593, + "LABEL_594": 594, + "LABEL_595": 595, + "LABEL_596": 596, + "LABEL_597": 597, + "LABEL_598": 598, + "LABEL_599": 599, + "LABEL_6": 6, + "LABEL_60": 60, + "LABEL_600": 600, + "LABEL_601": 601, + "LABEL_602": 602, + "LABEL_603": 603, + "LABEL_604": 604, + "LABEL_605": 605, + "LABEL_606": 606, + "LABEL_607": 607, + "LABEL_608": 608, + "LABEL_609": 609, + "LABEL_61": 61, + "LABEL_610": 610, + "LABEL_611": 611, + "LABEL_612": 612, + "LABEL_613": 613, + "LABEL_614": 614, + "LABEL_615": 615, + "LABEL_616": 616, + "LABEL_617": 617, + "LABEL_618": 618, + "LABEL_619": 619, + "LABEL_62": 62, + "LABEL_620": 620, + "LABEL_621": 621, + "LABEL_622": 622, + "LABEL_623": 623, + "LABEL_624": 624, + "LABEL_625": 625, + "LABEL_626": 626, + "LABEL_627": 627, + "LABEL_628": 628, + "LABEL_629": 629, + "LABEL_63": 63, + "LABEL_630": 630, + "LABEL_631": 631, + "LABEL_632": 632, + "LABEL_633": 633, + "LABEL_634": 634, + "LABEL_635": 635, + "LABEL_636": 636, + "LABEL_637": 637, + "LABEL_638": 638, + "LABEL_639": 639, + "LABEL_64": 64, + "LABEL_640": 640, + "LABEL_641": 641, + "LABEL_642": 642, + "LABEL_643": 643, + "LABEL_644": 644, + "LABEL_645": 645, + "LABEL_646": 646, + "LABEL_647": 647, + "LABEL_648": 648, + "LABEL_649": 649, + "LABEL_65": 65, + "LABEL_650": 650, + "LABEL_651": 651, + "LABEL_652": 652, + "LABEL_653": 653, + "LABEL_654": 654, + "LABEL_655": 655, + "LABEL_656": 656, + "LABEL_657": 657, + "LABEL_658": 658, + "LABEL_659": 659, + "LABEL_66": 66, + "LABEL_660": 660, + "LABEL_661": 661, + "LABEL_662": 662, + "LABEL_663": 663, + "LABEL_664": 664, + "LABEL_665": 665, + "LABEL_666": 666, + "LABEL_667": 667, + "LABEL_668": 668, + "LABEL_669": 669, + "LABEL_67": 67, + "LABEL_670": 670, + "LABEL_671": 671, + "LABEL_672": 672, + "LABEL_673": 673, + "LABEL_674": 674, + "LABEL_675": 675, + "LABEL_676": 676, + "LABEL_677": 677, + "LABEL_678": 678, + "LABEL_679": 679, + "LABEL_68": 68, + "LABEL_680": 680, + "LABEL_681": 681, + "LABEL_682": 682, + "LABEL_683": 683, + "LABEL_684": 684, + "LABEL_685": 685, + "LABEL_686": 686, + "LABEL_687": 687, + "LABEL_688": 688, + "LABEL_689": 689, + "LABEL_69": 69, + "LABEL_690": 690, + "LABEL_691": 691, + "LABEL_692": 692, + "LABEL_693": 693, + "LABEL_694": 694, + "LABEL_695": 695, + "LABEL_696": 696, + "LABEL_697": 697, + "LABEL_698": 698, + "LABEL_699": 699, + "LABEL_7": 7, + "LABEL_70": 70, + "LABEL_700": 700, + "LABEL_701": 701, + "LABEL_702": 702, + "LABEL_703": 703, + "LABEL_704": 704, + "LABEL_705": 705, + "LABEL_706": 706, + "LABEL_707": 707, + "LABEL_708": 708, + "LABEL_709": 709, + "LABEL_71": 71, + "LABEL_710": 710, + "LABEL_711": 711, + "LABEL_712": 712, + "LABEL_713": 713, + "LABEL_714": 714, + "LABEL_715": 715, + "LABEL_716": 716, + "LABEL_717": 717, + "LABEL_718": 718, + "LABEL_719": 719, + "LABEL_72": 72, + "LABEL_720": 720, + "LABEL_721": 721, + "LABEL_722": 722, + "LABEL_723": 723, + "LABEL_724": 724, + "LABEL_725": 725, + "LABEL_726": 726, + "LABEL_727": 727, + "LABEL_728": 728, + "LABEL_729": 729, + "LABEL_73": 73, + "LABEL_730": 730, + "LABEL_731": 731, + "LABEL_732": 732, + "LABEL_733": 733, + "LABEL_734": 734, + "LABEL_735": 735, + "LABEL_736": 736, + "LABEL_737": 737, + "LABEL_738": 738, + "LABEL_739": 739, + "LABEL_74": 74, + "LABEL_740": 740, + "LABEL_741": 741, + "LABEL_742": 742, + "LABEL_743": 743, + "LABEL_744": 744, + "LABEL_745": 745, + "LABEL_746": 746, + "LABEL_747": 747, + "LABEL_748": 748, + "LABEL_749": 749, + "LABEL_75": 75, + "LABEL_750": 750, + "LABEL_751": 751, + "LABEL_752": 752, + "LABEL_753": 753, + "LABEL_754": 754, + "LABEL_755": 755, + "LABEL_756": 756, + "LABEL_757": 757, + "LABEL_758": 758, + "LABEL_759": 759, + "LABEL_76": 76, + "LABEL_760": 760, + "LABEL_761": 761, + "LABEL_762": 762, + "LABEL_763": 763, + "LABEL_764": 764, + "LABEL_765": 765, + "LABEL_766": 766, + "LABEL_767": 767, + "LABEL_768": 768, + "LABEL_769": 769, + "LABEL_77": 77, + "LABEL_770": 770, + "LABEL_771": 771, + "LABEL_772": 772, + "LABEL_773": 773, + "LABEL_774": 774, + "LABEL_775": 775, + "LABEL_776": 776, + "LABEL_777": 777, + "LABEL_778": 778, + "LABEL_779": 779, + "LABEL_78": 78, + "LABEL_780": 780, + "LABEL_781": 781, + "LABEL_782": 782, + "LABEL_783": 783, + "LABEL_784": 784, + "LABEL_785": 785, + "LABEL_786": 786, + "LABEL_787": 787, + "LABEL_788": 788, + "LABEL_789": 789, + "LABEL_79": 79, + "LABEL_790": 790, + "LABEL_791": 791, + "LABEL_792": 792, + "LABEL_793": 793, + "LABEL_794": 794, + "LABEL_795": 795, + "LABEL_796": 796, + "LABEL_797": 797, + "LABEL_798": 798, + "LABEL_799": 799, + "LABEL_8": 8, + "LABEL_80": 80, + "LABEL_800": 800, + "LABEL_801": 801, + "LABEL_802": 802, + "LABEL_803": 803, + "LABEL_804": 804, + "LABEL_805": 805, + "LABEL_806": 806, + "LABEL_807": 807, + "LABEL_808": 808, + "LABEL_809": 809, + "LABEL_81": 81, + "LABEL_810": 810, + "LABEL_811": 811, + "LABEL_812": 812, + "LABEL_813": 813, + "LABEL_814": 814, + "LABEL_815": 815, + "LABEL_816": 816, + "LABEL_817": 817, + "LABEL_818": 818, + "LABEL_819": 819, + "LABEL_82": 82, + "LABEL_820": 820, + "LABEL_821": 821, + "LABEL_822": 822, + "LABEL_823": 823, + "LABEL_824": 824, + "LABEL_825": 825, + "LABEL_826": 826, + "LABEL_827": 827, + "LABEL_828": 828, + "LABEL_829": 829, + "LABEL_83": 83, + "LABEL_830": 830, + "LABEL_831": 831, + "LABEL_832": 832, + "LABEL_833": 833, + "LABEL_834": 834, + "LABEL_835": 835, + "LABEL_836": 836, + "LABEL_837": 837, + "LABEL_838": 838, + "LABEL_839": 839, + "LABEL_84": 84, + "LABEL_840": 840, + "LABEL_841": 841, + "LABEL_842": 842, + "LABEL_843": 843, + "LABEL_844": 844, + "LABEL_845": 845, + "LABEL_846": 846, + "LABEL_847": 847, + "LABEL_848": 848, + "LABEL_849": 849, + "LABEL_85": 85, + "LABEL_850": 850, + "LABEL_851": 851, + "LABEL_852": 852, + "LABEL_853": 853, + "LABEL_854": 854, + "LABEL_855": 855, + "LABEL_856": 856, + "LABEL_857": 857, + "LABEL_858": 858, + "LABEL_859": 859, + "LABEL_86": 86, + "LABEL_860": 860, + "LABEL_861": 861, + "LABEL_862": 862, + "LABEL_863": 863, + "LABEL_864": 864, + "LABEL_865": 865, + "LABEL_866": 866, + "LABEL_867": 867, + "LABEL_868": 868, + "LABEL_869": 869, + "LABEL_87": 87, + "LABEL_870": 870, + "LABEL_871": 871, + "LABEL_872": 872, + "LABEL_873": 873, + "LABEL_874": 874, + "LABEL_875": 875, + "LABEL_876": 876, + "LABEL_877": 877, + "LABEL_878": 878, + "LABEL_879": 879, + "LABEL_88": 88, + "LABEL_880": 880, + "LABEL_881": 881, + "LABEL_882": 882, + "LABEL_883": 883, + "LABEL_884": 884, + "LABEL_885": 885, + "LABEL_886": 886, + "LABEL_887": 887, + "LABEL_888": 888, + "LABEL_889": 889, + "LABEL_89": 89, + "LABEL_890": 890, + "LABEL_891": 891, + "LABEL_892": 892, + "LABEL_893": 893, + "LABEL_894": 894, + "LABEL_895": 895, + "LABEL_896": 896, + "LABEL_897": 897, + "LABEL_898": 898, + "LABEL_899": 899, + "LABEL_9": 9, + "LABEL_90": 90, + "LABEL_900": 900, + "LABEL_901": 901, + "LABEL_902": 902, + "LABEL_903": 903, + "LABEL_904": 904, + "LABEL_905": 905, + "LABEL_906": 906, + "LABEL_907": 907, + "LABEL_908": 908, + "LABEL_909": 909, + "LABEL_91": 91, + "LABEL_910": 910, + "LABEL_911": 911, + "LABEL_912": 912, + "LABEL_913": 913, + "LABEL_914": 914, + "LABEL_915": 915, + "LABEL_916": 916, + "LABEL_917": 917, + "LABEL_918": 918, + "LABEL_919": 919, + "LABEL_92": 92, + "LABEL_920": 920, + "LABEL_921": 921, + "LABEL_922": 922, + "LABEL_923": 923, + "LABEL_924": 924, + "LABEL_925": 925, + "LABEL_926": 926, + "LABEL_927": 927, + "LABEL_928": 928, + "LABEL_929": 929, + "LABEL_93": 93, + "LABEL_930": 930, + "LABEL_931": 931, + "LABEL_932": 932, + "LABEL_933": 933, + "LABEL_934": 934, + "LABEL_935": 935, + "LABEL_936": 936, + "LABEL_937": 937, + "LABEL_938": 938, + "LABEL_939": 939, + "LABEL_94": 94, + "LABEL_940": 940, + "LABEL_941": 941, + "LABEL_942": 942, + "LABEL_943": 943, + "LABEL_944": 944, + "LABEL_945": 945, + "LABEL_946": 946, + "LABEL_947": 947, + "LABEL_948": 948, + "LABEL_949": 949, + "LABEL_95": 95, + "LABEL_950": 950, + "LABEL_951": 951, + "LABEL_952": 952, + "LABEL_953": 953, + "LABEL_954": 954, + "LABEL_955": 955, + "LABEL_956": 956, + "LABEL_957": 957, + "LABEL_958": 958, + "LABEL_959": 959, + "LABEL_96": 96, + "LABEL_960": 960, + "LABEL_961": 961, + "LABEL_962": 962, + "LABEL_963": 963, + "LABEL_964": 964, + "LABEL_965": 965, + "LABEL_966": 966, + "LABEL_967": 967, + "LABEL_968": 968, + "LABEL_969": 969, + "LABEL_97": 97, + "LABEL_970": 970, + "LABEL_971": 971, + "LABEL_972": 972, + "LABEL_973": 973, + "LABEL_974": 974, + "LABEL_975": 975, + "LABEL_976": 976, + "LABEL_977": 977, + "LABEL_978": 978, + "LABEL_979": 979, + "LABEL_98": 98, + "LABEL_980": 980, + "LABEL_981": 981, + "LABEL_982": 982, + "LABEL_983": 983, + "LABEL_984": 984, + "LABEL_985": 985, + "LABEL_986": 986, + "LABEL_987": 987, + "LABEL_988": 988, + "LABEL_989": 989, + "LABEL_99": 99, + "LABEL_990": 990, + "LABEL_991": 991, + "LABEL_992": 992, + "LABEL_993": 993, + "LABEL_994": 994, + "LABEL_995": 995, + "LABEL_996": 996, + "LABEL_997": 997, + "LABEL_998": 998, + "LABEL_999": 999 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.05, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "transformers_version": "4.33.2", + "use_weighted_layer_sum": true, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/wavlm-base-sv/onnx/model.onnx b/wavlm-base-sv/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5177e7c9fd892e49943ca71262f3a315a5341a90 --- /dev/null +++ b/wavlm-base-sv/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58dd1efb7e1e34ad249653f6fd9aa7bec2833498bde8dc09b6f8e9f10f93ac67 +size 402471430 diff --git a/wavlm-base-sv/onnx/model_quantized.onnx b/wavlm-base-sv/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..97a8ab0b3aae2cc1113d0ea1c0ca9ff7509a9a4c --- /dev/null +++ b/wavlm-base-sv/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9722d3990c9244dd7990be48508440e3cc7476fa101011c516d36b1f3b260043 +size 101683444 diff --git a/wavlm-base-sv/preprocessor_config.json b/wavlm-base-sv/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/wavlm-base-sv/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wavlm-base-sv/quantize_config.json b/wavlm-base-sv/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1cb246a93098d7677c0228534f07fb88e55bb9a5 --- /dev/null +++ b/wavlm-base-sv/quantize_config.json @@ -0,0 +1,48 @@ +{ + "per_channel": false, + "reduce_range": false, + "per_model_config": { + "model": { + "op_types": [ + "Conv", + "Where", + "Relu", + "Reshape", + "Less", + "Add", + "Min", + "Sub", + "Slice", + "Shape", + "Greater", + "Tile", + "Abs", + "Gemm", + "Pow", + "Expand", + "Sigmoid", + "ReduceMean", + "Transpose", + "ConstantOfShape", + "Softmax", + "Range", + "Erf", + "Cast", + "Div", + "Sqrt", + "Constant", + "Pad", + "ReduceProd", + "MatMul", + "Gather", + "Unsqueeze", + "Concat", + "Log", + "InstanceNormalization", + "ReduceSum", + "Mul" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/wavlm-base-sv/source.txt b/wavlm-base-sv/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e71be8fa0d942129d5cdd64d1731af94a957124 --- /dev/null +++ b/wavlm-base-sv/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/wavlm-base-sv \ No newline at end of file diff --git a/wavlm-base/.gitattributes b/wavlm-base/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-base/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-base/README.md b/wavlm-base/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dae5f96ee333500da23932f2fd54f10abe3b14b0 --- /dev/null +++ b/wavlm-base/README.md @@ -0,0 +1,8 @@ +--- +base_model: microsoft/wavlm-base +library_name: transformers.js +--- + +https://huggingface.co/microsoft/wavlm-base with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/wavlm-base/config.json b/wavlm-base/config.json new file mode 100644 index 0000000000000000000000000000000000000000..658a594ed879a5b1defa06063cc3a7283cbb2ac5 --- /dev/null +++ b/wavlm-base/config.json @@ -0,0 +1,120 @@ +{ + "_name_or_path": "microsoft/wavlm-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMModel" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.05, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "transformers_version": "4.33.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/wavlm-base/onnx/model.onnx b/wavlm-base/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..659cc996123313f5ad7f0442a57b9f88f2d4025d --- /dev/null +++ b/wavlm-base/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebf9b34186c2fedb8f85064b8644ce54d4a71a09655edc82dc9b339bdcf1058 +size 377935358 diff --git a/wavlm-base/onnx/model_fp16.onnx b/wavlm-base/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2c9cf6fb521b09651c5c200f8e7e6d84a2018fd2 --- /dev/null +++ b/wavlm-base/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6a7a86d819ed45bc35efdfe1ead1aca108b00bf50438441d5d642fe3782406 +size 189308046 diff --git a/wavlm-base/onnx/model_quantized.onnx b/wavlm-base/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e34728dbd90ca4900eadc95b404375c8d3c9ef79 --- /dev/null +++ b/wavlm-base/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52a486ac131f07185022a37f2ee7ebd575261e199745c40bb001a53dc5468e1 +size 95421860 diff --git a/wavlm-base/preprocessor_config.json b/wavlm-base/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10f6def8c83d70a2b087a567dcf523b75152a80b --- /dev/null +++ b/wavlm-base/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": false, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wavlm-base/quantize_config.json b/wavlm-base/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..34e36915b1dcdb25c1ec625dd9353d8a70218db6 --- /dev/null +++ b/wavlm-base/quantize_config.json @@ -0,0 +1,45 @@ +{ + "per_channel": false, + "reduce_range": false, + "per_model_config": { + "model": { + "op_types": [ + "MatMul", + "Expand", + "Min", + "ReduceSum", + "Slice", + "ReduceMean", + "Abs", + "Less", + "Add", + "Sqrt", + "Tile", + "Where", + "Greater", + "Gather", + "Transpose", + "Constant", + "Log", + "InstanceNormalization", + "Div", + "Reshape", + "Mul", + "Concat", + "Sub", + "Shape", + "Softmax", + "Gemm", + "Unsqueeze", + "Erf", + "Sigmoid", + "Conv", + "Range", + "ConstantOfShape", + "Pow", + "Cast" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/wavlm-base/source.txt b/wavlm-base/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..e19263aeb800ce6caaf79194ec9081546aa93e71 --- /dev/null +++ b/wavlm-base/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/wavlm-base \ No newline at end of file diff --git a/wavlm-large-mnn/.gitattributes b/wavlm-large-mnn/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..6f87174c2a7b3939f92358e93bb88335fb580c8e --- /dev/null +++ b/wavlm-large-mnn/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +wavlm_large_int8.mnn filter=lfs diff=lfs merge=lfs -text +wavlm_large_fp16.mnn filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-large-mnn/README.md b/wavlm-large-mnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3a81ea4526f4aa990c3043ef286990c4662ba4b5 --- /dev/null +++ b/wavlm-large-mnn/README.md @@ -0,0 +1,10 @@ +--- +license: apache-2.0 +tags: +- audio +language: +- zh +--- +WavLM-Large MNN Version. + +More details: https://zhuanlan.zhihu.com/p/1894282074397057031 \ No newline at end of file diff --git a/wavlm-large-mnn/source.txt b/wavlm-large-mnn/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..1df4e5a19adfdcdc0e025bab1d0695d42dcb47c4 --- /dev/null +++ b/wavlm-large-mnn/source.txt @@ -0,0 +1 @@ +https://huggingface.co/yunfengwang/wavlm-large-mnn \ No newline at end of file diff --git a/wavlm-large-mnn/wavlm_large_fp16.mnn b/wavlm-large-mnn/wavlm_large_fp16.mnn new file mode 100644 index 0000000000000000000000000000000000000000..41a156ff92b30baab91537cbddf954556c8c3148 --- /dev/null +++ b/wavlm-large-mnn/wavlm_large_fp16.mnn @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515a5aa20c57766ffb0ba9249b450fb1a1edc924bf4e3624da814576853cf331 +size 649637356 diff --git a/wavlm-large-mnn/wavlm_large_int8.mnn b/wavlm-large-mnn/wavlm_large_int8.mnn new file mode 100644 index 0000000000000000000000000000000000000000..43711d284b7fb222eacdd1f015b3f36b4dd0b703 --- /dev/null +++ b/wavlm-large-mnn/wavlm_large_int8.mnn @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1764f3b7e01da21bf6e5aef23448c59acd3e197b867158c2bb74819693bb78 +size 327161228 diff --git a/wavlm-large-onnx/.gitattributes b/wavlm-large-onnx/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-large-onnx/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-large-onnx/README.md b/wavlm-large-onnx/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5fd933f237a1477dcd03745d5680df00b9614fb9 --- /dev/null +++ b/wavlm-large-onnx/README.md @@ -0,0 +1,7 @@ +--- +license: apache-2.0 +--- + +WavLM-Large ONNX Version. + +More details: https://zhuanlan.zhihu.com/p/1894282074397057031 \ No newline at end of file diff --git a/wavlm-large-onnx/source.txt b/wavlm-large-onnx/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..7af217a6361457b71b47ea8a8aa4fb0630061101 --- /dev/null +++ b/wavlm-large-onnx/source.txt @@ -0,0 +1 @@ +https://huggingface.co/yunfengwang/wavlm-large-onnx \ No newline at end of file diff --git a/wavlm-large-onnx/wavlm_large.onnx b/wavlm-large-onnx/wavlm_large.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e027f13df9d53b6493084cb77a56ad0084caec26 --- /dev/null +++ b/wavlm-large-onnx/wavlm_large.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bddde594534af5540500747270dda7dec6bf743788b00ee1199581011e9cb48 +size 1296712717 diff --git a/wavlm-large/.gitattributes b/wavlm-large/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/wavlm-large/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/wavlm-large/README.md b/wavlm-large/README.md new file mode 100644 index 0000000000000000000000000000000000000000..736f078eb5f88a0b768ad239e3ef22a02a248436 --- /dev/null +++ b/wavlm-large/README.md @@ -0,0 +1,8 @@ +--- +base_model: microsoft/wavlm-large +library_name: transformers.js +--- + +https://huggingface.co/microsoft/wavlm-large with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/wavlm-large/config.json b/wavlm-large/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cdfecb8bf40f949cd8d8764d5e427ec768ce8db4 --- /dev/null +++ b/wavlm-large/config.json @@ -0,0 +1,120 @@ +{ + "_name_or_path": "microsoft/wavlm-large", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMModel" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 768, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.075, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 24, + "num_negatives": 100, + "output_hidden_size": 1024, + "pad_token_id": 0, + "proj_codevector_dim": 768, + "replace_prob": 0.5, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "transformers_version": "4.33.2", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/wavlm-large/onnx/model.onnx b/wavlm-large/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..800346d5572aa29f50a70f4ab39f0eacfea3b513 --- /dev/null +++ b/wavlm-large/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315a962fde21c5086cee3badfd0c87dbd926383ee84f6be746e5b98707a7e267 +size 1262612914 diff --git a/wavlm-large/onnx/model_fp16.onnx b/wavlm-large/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9b1ee2727fdb5b70c22a93677a2122f8311b8e62 --- /dev/null +++ b/wavlm-large/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648440d0c164054b7c093fcb2389f548299f1eaf58ced1905b041900868c19ba +size 631983579 diff --git a/wavlm-large/onnx/model_quantized.onnx b/wavlm-large/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..065bf98bbefda0e4b31dfa1c99a073bc8c34be8c --- /dev/null +++ b/wavlm-large/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d70a5548cd9139ccc9f3285941c3b6fcaf6bd53d1834bcbf10da4f9d22431914 +size 317752143 diff --git a/wavlm-large/preprocessor_config.json b/wavlm-large/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wavlm-large/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wavlm-large/quantize_config.json b/wavlm-large/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3569254ad36eda83d64059f09296a0cd5266ae0e --- /dev/null +++ b/wavlm-large/quantize_config.json @@ -0,0 +1,44 @@ +{ + "per_channel": false, + "reduce_range": false, + "per_model_config": { + "model": { + "op_types": [ + "Sqrt", + "Slice", + "Conv", + "Mul", + "Shape", + "Cast", + "Softmax", + "Tile", + "Div", + "ConstantOfShape", + "Sub", + "Add", + "Range", + "Gemm", + "Greater", + "Erf", + "Less", + "Unsqueeze", + "Min", + "Reshape", + "Expand", + "Constant", + "ReduceMean", + "MatMul", + "Abs", + "Where", + "Gather", + "Pow", + "Concat", + "ReduceSum", + "Sigmoid", + "Transpose", + "Log" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/wavlm-large/source.txt b/wavlm-large/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b49b10464b0528d3481820a9d5d362b9c3e7315 --- /dev/null +++ b/wavlm-large/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/wavlm-large \ No newline at end of file