diff --git a/AudioEmoDetect_v1_onnx/.gitattributes b/AudioEmoDetect_v1_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/AudioEmoDetect_v1_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.config b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.config
new file mode 100644
index 0000000000000000000000000000000000000000..72e24d5e47258a0cb018571a0b97249ffa9e03d7
--- /dev/null
+++ b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.config
@@ -0,0 +1,93 @@
+HubertConfig {
+ "_name_or_path": "PrachiPatel/AudioEmoDetect_v1",
+ "activation_dropout": 0.1,
+ "apply_spec_augment": true,
+ "architectures": [
+ "HubertForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "conv_bias": false,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "do_stable_layer_norm": false,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.1,
+ "feat_proj_layer_norm": true,
+ "final_dropout": 0.1,
+ "gradient_checkpointing": false,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "Anger",
+ "1": "Disgust",
+ "2": "Fear",
+ "3": "Happiness",
+ "4": "Neutral",
+ "5": "Sadness"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2,
+ "LABEL_3": 3,
+ "LABEL_4": 4,
+ "LABEL_5": 5
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.05,
+ "model_type": "hubert",
+ "num_attention_heads": 12,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "pooling_mode": "mean",
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32
+}
diff --git a/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.feature b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.feature
new file mode 100644
index 0000000000000000000000000000000000000000..dc9d64b4acf34285cec979c8b63084b2f4bf6790
--- /dev/null
+++ b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.onnx b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..7aef5fce99f332deeb60cf857e6b748ebc0b1011
--- /dev/null
+++ b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b62b4b3e7290be70d72440cf70765005d675695243a03a5781ede4678c05111
+size 378573700
diff --git a/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.yaml b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13f7323f49c23d814d105f939cc866fea1e61206
--- /dev/null
+++ b/AudioEmoDetect_v1_onnx/AudioEmoDetect_v1.yaml
@@ -0,0 +1,14 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: PrachiPatel/AudioEmoDetect_v1
+labels:
+ 0: Anger
+ 1: Disgust
+ 2: Fear
+ 3: Happiness
+ 4: Neutral
+ 5: Sadness
+model: HubertForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/AudioEmoDetect_v1_onnx/source.txt b/AudioEmoDetect_v1_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de39cdb3691345098107615783662f1b248f15b1
--- /dev/null
+++ b/AudioEmoDetect_v1_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/AudioEmoDetect_v1_onnx
\ No newline at end of file
diff --git a/audio-emotion-detection-onnx/.gitattributes b/audio-emotion-detection-onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/audio-emotion-detection-onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/audio-emotion-detection-onnx/README.md b/audio-emotion-detection-onnx/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7b95401dc46245ac339fc25059d4a56d90b4cde5
--- /dev/null
+++ b/audio-emotion-detection-onnx/README.md
@@ -0,0 +1,3 @@
+---
+license: apache-2.0
+---
diff --git a/audio-emotion-detection-onnx/config.json b/audio-emotion-detection-onnx/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..83af6f285bfbfa705efa515438fa31bf4a52bcac
--- /dev/null
+++ b/audio-emotion-detection-onnx/config.json
@@ -0,0 +1,133 @@
+{
+ "_name_or_path": "Hatman/audio-emotion-detection",
+ "activation_dropout": 0.05,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.05,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.05,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "Angry",
+ "1": "Disgusted",
+ "2": "Fearful",
+ "3": "Happy",
+ "4": "Neutral",
+ "5": "Sad",
+ "6": "Suprised"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "Angry": "0",
+ "Disgusted": "1",
+ "Fearful": "2",
+ "Happy": "3",
+ "Neutral": "4",
+ "Sad": "5",
+ "Suprised": "6"
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.05,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "transformers_version": "4.37.2",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 33,
+ "xvector_output_dim": 512
+}
diff --git a/audio-emotion-detection-onnx/onnx/model_quantized.onnx b/audio-emotion-detection-onnx/onnx/model_quantized.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a5b70f57cdb76544d81cd1d27dfe9ac127cdd915
--- /dev/null
+++ b/audio-emotion-detection-onnx/onnx/model_quantized.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08a04319bc2a171a948f23375fb2397c7f99c4fbd9a984325c0ad9082964d8b5
+size 1263408071
diff --git a/audio-emotion-detection-onnx/preprocessor_config.json b/audio-emotion-detection-onnx/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f99bcabcbeaf80e6791d79c9cb6cd68c6e7ae95
--- /dev/null
+++ b/audio-emotion-detection-onnx/preprocessor_config.json
@@ -0,0 +1,10 @@
+{
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "processor_class": "Wav2Vec2ProcessorWithLM",
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/audio-emotion-detection-onnx/source.txt b/audio-emotion-detection-onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbcc0c0f91e672f5d135ca6ca3fbbd83ee066f87
--- /dev/null
+++ b/audio-emotion-detection-onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/Aroganta/audio-emotion-detection-onnx
\ No newline at end of file
diff --git a/unispeech-sat-emotion-russian-resd_onnx/.gitattributes b/unispeech-sat-emotion-russian-resd_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/unispeech-sat-emotion-russian-resd_onnx/source.txt b/unispeech-sat-emotion-russian-resd_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c086214a3720a0f38e609241376fbf3cb3235d7e
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/unispeech-sat-emotion-russian-resd_onnx
\ No newline at end of file
diff --git a/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.config b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.config
new file mode 100644
index 0000000000000000000000000000000000000000..1bde6d3305ac513fdd85c38689edeb5251f51d4d
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.config
@@ -0,0 +1,127 @@
+UniSpeechSatConfig {
+ "_name_or_path": "Aniemore/unispeech-sat-emotion-russian-resd",
+ "activation_dropout": 0.05,
+ "apply_spec_augment": true,
+ "architectures": [
+ "UniSpeechSatForSequenceClassification"
+ ],
+ "attention_dropout": 0.05,
+ "bos_token_id": 1,
+ "classifier_proj_size": 768,
+ "codevector_dim": 768,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.05,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.05,
+ "finetuning_task": [
+ "unispeech_sat_classification"
+ ],
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.05,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "anger",
+ "1": "disgust",
+ "2": "enthusiasm",
+ "3": "fear",
+ "4": "happiness",
+ "5": "neutral",
+ "6": "sadness"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "anger": 0,
+ "disgust": 1,
+ "enthusiasm": 2,
+ "fear": 3,
+ "happiness": 4,
+ "neutral": 5,
+ "sadness": 6
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.05,
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.05,
+ "model_type": "unispeech-sat",
+ "num_attention_heads": 16,
+ "num_clusters": 504,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "pad_token_id": 0,
+ "pooling_mode": "mean",
+ "problem_type": "single_label_classification",
+ "proj_codevector_dim": 768,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 40,
+ "xvector_output_dim": 512
+}
diff --git a/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.feature b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.feature
new file mode 100644
index 0000000000000000000000000000000000000000..7c6e5bc4c80dd64242e48f6297826d4fa682abd5
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.onnx b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..28c659fc8a07a6ddbc367772b4a233f9664d4bec
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c52c43e1933d1be1f8c2e1e10c4cf63f3f66802453d1764b374f0bd9e2cc6e91
+size 1265552756
diff --git a/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.processor b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.processor
new file mode 100644
index 0000000000000000000000000000000000000000..66aeb17a20c5744a591f46b492197977397ddb7c
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.processor
@@ -0,0 +1,21 @@
+Wav2Vec2Processor:
+- feature_extractor: Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
+
+- tokenizer: Wav2Vec2CTCTokenizer(name_or_path='Aniemore/unispeech-sat-emotion-russian-resd', vocab_size=40, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'pad_token': ''}, clean_up_tokenization_spaces=True), added_tokens_decoder={
+ 0: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 1: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 2: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 3: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+}
+
+{
+ "processor_class": "Wav2Vec2Processor"
+}
diff --git a/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.tokens b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.tokens
new file mode 100644
index 0000000000000000000000000000000000000000..105919a969361bd2994f735cede694770caad511
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.tokens
@@ -0,0 +1,40 @@
+0:
+1:
+2:
+3:
+4: '|'
+5: ''''
+6: '-'
+7: а
+8: б
+9: в
+10: г
+11: д
+12: е
+13: ж
+14: з
+15: и
+16: й
+17: к
+18: л
+19: м
+20: н
+21: о
+22: п
+23: р
+24: с
+25: т
+26: у
+27: ф
+28: х
+29: ц
+30: ч
+31: ш
+32: щ
+33: ъ
+34: ы
+35: ь
+36: э
+37: ю
+38: я
+39: ё
diff --git a/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.yaml b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0aed8834740163724c00038f0aa9bbb78d1573ea
--- /dev/null
+++ b/unispeech-sat-emotion-russian-resd_onnx/unispeech-sat-emotion-russian-resd.yaml
@@ -0,0 +1,15 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: Aniemore/unispeech-sat-emotion-russian-resd
+labels:
+ 0: anger
+ 1: disgust
+ 2: enthusiasm
+ 3: fear
+ 4: happiness
+ 5: neutral
+ 6: sadness
+model: UniSpeechSatForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/urdu-audio-emotions_onnx/.gitattributes b/urdu-audio-emotions_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/urdu-audio-emotions_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/urdu-audio-emotions_onnx/source.txt b/urdu-audio-emotions_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67a021025226e653e1c2f4166983ad72838f9d8b
--- /dev/null
+++ b/urdu-audio-emotions_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/urdu-audio-emotions_onnx
\ No newline at end of file
diff --git a/urdu-audio-emotions_onnx/urdu-audio-emotions.config b/urdu-audio-emotions_onnx/urdu-audio-emotions.config
new file mode 100644
index 0000000000000000000000000000000000000000..c8d2f22f8f2da995eb29b4cabe34bcc7a49e152f
--- /dev/null
+++ b/urdu-audio-emotions_onnx/urdu-audio-emotions.config
@@ -0,0 +1,129 @@
+Wav2Vec2Config {
+ "_name_or_path": "Talha/urdu-audio-emotions",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 768,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "gradient_checkpointing": false,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "Angry",
+ "1": "Happy",
+ "2": "Neutral",
+ "3": "Sad"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "Angry": "0",
+ "Happy": "1",
+ "Neutral": "2",
+ "Sad": "3"
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.075,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 768,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 512
+}
diff --git a/urdu-audio-emotions_onnx/urdu-audio-emotions.feature b/urdu-audio-emotions_onnx/urdu-audio-emotions.feature
new file mode 100644
index 0000000000000000000000000000000000000000..7c6e5bc4c80dd64242e48f6297826d4fa682abd5
--- /dev/null
+++ b/urdu-audio-emotions_onnx/urdu-audio-emotions.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/urdu-audio-emotions_onnx/urdu-audio-emotions.onnx b/urdu-audio-emotions_onnx/urdu-audio-emotions.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f1b9d74108ff2be32daf241ca98a2128ca3b950b
--- /dev/null
+++ b/urdu-audio-emotions_onnx/urdu-audio-emotions.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7623de49f735de6bd611721738c495575192efa1bd4ac04bf8409d0e9a78d2d5
+size 1263404950
diff --git a/urdu-audio-emotions_onnx/urdu-audio-emotions.yaml b/urdu-audio-emotions_onnx/urdu-audio-emotions.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f0ecf3229567b715f1456f55e2d81bd7ba65b40
--- /dev/null
+++ b/urdu-audio-emotions_onnx/urdu-audio-emotions.yaml
@@ -0,0 +1,12 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: Talha/urdu-audio-emotions
+labels:
+ 0: Angry
+ 1: Happy
+ 2: Neutral
+ 3: Sad
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/w2v2c_ko_emotion_onnx/.gitattributes b/w2v2c_ko_emotion_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/w2v2c_ko_emotion_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/w2v2c_ko_emotion_onnx/source.txt b/w2v2c_ko_emotion_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fea633511f52c03b41a3cd9d08bb9e5a266ec05
--- /dev/null
+++ b/w2v2c_ko_emotion_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/w2v2c_ko_emotion_onnx
\ No newline at end of file
diff --git a/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.config b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.config
new file mode 100644
index 0000000000000000000000000000000000000000..297be57dd58fe68d32af5a5b9fbcfaf9461a0e0f
--- /dev/null
+++ b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.config
@@ -0,0 +1,124 @@
+Wav2Vec2ConformerConfig {
+ "_name_or_path": "JUNGWJ/w2v2c_ko_emotion",
+ "activation_dropout": 0.1,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ConformerForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "conformer_conv_dropout": 0.1,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_depthwise_kernel_size": 31,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.0,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.1,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "Happy",
+ "1": "Sad",
+ "2": "Angry",
+ "3": "Anxious"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "Angry": "2",
+ "Anxious": "3",
+ "Happy": "0",
+ "Sad": "1"
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.0,
+ "mask_feature_length": 64,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.05,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.05,
+ "max_source_positions": 5000,
+ "model_type": "wav2vec2-conformer",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 12,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "num_negatives": 100,
+ "output_hidden_size": 768,
+ "pad_token_id": 0,
+ "position_embeddings_type": "relative",
+ "proj_codevector_dim": 256,
+ "rotary_embedding_base": 10000,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 111,
+ "xvector_output_dim": 512
+}
diff --git a/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.feature b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.feature
new file mode 100644
index 0000000000000000000000000000000000000000..0be68ab30a4dc0a635948b99096fa6887785ddb2
--- /dev/null
+++ b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.onnx b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..ff1d139233d3dd9225e7be35dc9e05ca84184935
--- /dev/null
+++ b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e094a84c8c33b9dd3d9b7d8e8a83d42e30b2ae9fe51bc682ce33a12b20ab663b
+size 732226369
diff --git a/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.yaml b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..774d851689e164df9823d9e854e7248b68f8e504
--- /dev/null
+++ b/w2v2c_ko_emotion_onnx/w2v2c_ko_emotion.yaml
@@ -0,0 +1,12 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: JUNGWJ/w2v2c_ko_emotion
+labels:
+ 0: Happy
+ 1: Sad
+ 2: Angry
+ 3: Anxious
+model: Wav2Vec2ConformerForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-base-Speech_Emotion_Recognition_onnx/.gitattributes b/wav2vec2-base-Speech_Emotion_Recognition_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-base-Speech_Emotion_Recognition_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-base-Speech_Emotion_Recognition_onnx/source.txt b/wav2vec2-base-Speech_Emotion_Recognition_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfe49903c0e7c4182e5e5a83be2897abef8e9013
--- /dev/null
+++ b/wav2vec2-base-Speech_Emotion_Recognition_onnx/source.txt
@@ -0,0 +1 @@
+wav2vec2-base-Speech_Emotion_Recognition_onnx
\ No newline at end of file
diff --git a/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.config b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.config
new file mode 100644
index 0000000000000000000000000000000000000000..4c61f0b9f3fe69c6d90eb5833d74ff2b6d47fd61
--- /dev/null
+++ b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.config
@@ -0,0 +1,134 @@
+Wav2Vec2Config {
+ "_name_or_path": "DunnBC22/wav2vec2-base-Speech_Emotion_Recognition",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": false,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "freeze_feat_extract_train": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "SAD",
+ "1": "ANGRY",
+ "2": "DISGUST",
+ "3": "FEAR",
+ "4": "HAPPY",
+ "5": "NEUTRAL"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "ANGRY": "1",
+ "DISGUST": "2",
+ "FEAR": "3",
+ "HAPPY": "4",
+ "NEUTRAL": "5",
+ "SAD": "0"
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.0,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "no_mask_channel_overlap": false,
+ "no_mask_time_overlap": false,
+ "num_adapter_layers": 3,
+ "num_attention_heads": 12,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "num_negatives": 100,
+ "output_hidden_size": 768,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.feature b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.feature
new file mode 100644
index 0000000000000000000000000000000000000000..8b0fc1ee98d994e693f7f5affb8e3d387b66b793
--- /dev/null
+++ b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.onnx b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..92750f0da7932699d3b23f9bb5f4e8b32898b9a8
--- /dev/null
+++ b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b11485a7a5bf84f0cf74b116f1d94bccbc2181a30dd7f86c8ce44d2f38d612f
+size 378583063
diff --git a/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.yaml b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dd1e9df334d972a655aa445cb0f3851c316df59
--- /dev/null
+++ b/wav2vec2-base-Speech_Emotion_Recognition_onnx/wav2vec2-base-Speech_Emotion_Recognition.yaml
@@ -0,0 +1,14 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: DunnBC22/wav2vec2-base-Speech_Emotion_Recognition
+labels:
+ 0: SAD
+ 1: ANGRY
+ 2: DISGUST
+ 3: FEAR
+ 4: HAPPY
+ 5: NEUTRAL
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-base-finetuned-emodb_onnx/.gitattributes b/wav2vec2-base-finetuned-emodb_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-base-finetuned-emodb_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-base-finetuned-emodb_onnx/source.txt b/wav2vec2-base-finetuned-emodb_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d895ffa19fc36e9516024f407c62cf407014f526
--- /dev/null
+++ b/wav2vec2-base-finetuned-emodb_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2-base-finetuned-emodb_onnx
\ No newline at end of file
diff --git a/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.config b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.config
new file mode 100644
index 0000000000000000000000000000000000000000..8bac77b7fda09e241d2e5ead9dd10fd0608b07cb
--- /dev/null
+++ b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.config
@@ -0,0 +1,132 @@
+Wav2Vec2Config {
+ "_name_or_path": "Hamzaaa/wav2vec2-base-finetuned-emodb",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": false,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "freeze_feat_extract_train": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "anger",
+ "1": "disgust",
+ "2": "fear",
+ "3": "happiness",
+ "4": "sadness"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "anger": 0,
+ "disgust": 1,
+ "fear": 2,
+ "happiness": 3,
+ "sadness": 4
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.0,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "no_mask_channel_overlap": false,
+ "no_mask_time_overlap": false,
+ "num_adapter_layers": 3,
+ "num_attention_heads": 12,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "num_negatives": 100,
+ "output_hidden_size": 768,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.feature b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.feature
new file mode 100644
index 0000000000000000000000000000000000000000..8b0fc1ee98d994e693f7f5affb8e3d387b66b793
--- /dev/null
+++ b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.onnx b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..8c3b9dc1332b447053b9329f75cb3580943d6fdc
--- /dev/null
+++ b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4dc008450e379642afc3197dbc7c2a19fb614e43e9f45527bed2a3e99d782d18
+size 378582035
diff --git a/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.yaml b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..edea4ecba9c476c7735b5130b02a3b2637c0d243
--- /dev/null
+++ b/wav2vec2-base-finetuned-emodb_onnx/wav2vec2-base-finetuned-emodb.yaml
@@ -0,0 +1,13 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: Hamzaaa/wav2vec2-base-finetuned-emodb
+labels:
+ 0: anger
+ 1: disgust
+ 2: fear
+ 3: happiness
+ 4: sadness
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/.gitattributes b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/source.txt b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b168a7253806dbea2309b91a3b90a9d6fccde91
--- /dev/null
+++ b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx
\ No newline at end of file
diff --git a/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.config b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.config
new file mode 100644
index 0000000000000000000000000000000000000000..353019f0bd57149fd1c455b122e28cd1c4508579
--- /dev/null
+++ b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.config
@@ -0,0 +1,134 @@
+Wav2Vec2Config {
+ "_name_or_path": "DrishtiSharma/wav2vec2-base-finetuned-sentiment-mesd-v9",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": false,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "freeze_feat_extract_train": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "Anger",
+ "1": "Disgust",
+ "2": "Fear",
+ "3": "Happiness",
+ "4": "Neutral",
+ "5": "Sadness"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "Anger": "0",
+ "Disgust": "1",
+ "Fear": "2",
+ "Happiness": "3",
+ "Neutral": "4",
+ "Sadness": "5"
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.0,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "no_mask_channel_overlap": false,
+ "no_mask_time_overlap": false,
+ "num_adapter_layers": 3,
+ "num_attention_heads": 12,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "num_negatives": 100,
+ "output_hidden_size": 768,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.feature b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.feature
new file mode 100644
index 0000000000000000000000000000000000000000..8b0fc1ee98d994e693f7f5affb8e3d387b66b793
--- /dev/null
+++ b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.onnx b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..75ca3e946d0b242b613c0e6a132697b7409f9d6e
--- /dev/null
+++ b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15aadf5ccc0418390fdee951caad43430e2adf6256b7f57b2c5eb0e515421efd
+size 378583063
diff --git a/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.yaml b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e2d32e4f2e127fe95bf60eba6303ddb2d9b1971
--- /dev/null
+++ b/wav2vec2-base-finetuned-sentiment-mesd-v9_onnx/wav2vec2-base-finetuned-sentiment-mesd-v9.yaml
@@ -0,0 +1,14 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: DrishtiSharma/wav2vec2-base-finetuned-sentiment-mesd-v9
+labels:
+ 0: Anger
+ 1: Disgust
+ 2: Fear
+ 3: Happiness
+ 4: Neutral
+ 5: Sadness
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-large-emotion-detection-german_onnx/.gitattributes b/wav2vec2-large-emotion-detection-german_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-large-emotion-detection-german_onnx/source.txt b/wav2vec2-large-emotion-detection-german_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc8dbb4742a35ee53655097cb9e4066dfcaefed6
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2-large-emotion-detection-german_onnx
\ No newline at end of file
diff --git a/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.config b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.config
new file mode 100644
index 0000000000000000000000000000000000000000..345ba67febfa8881297f4a700ce57f7729d57757
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.config
@@ -0,0 +1,131 @@
+Wav2Vec2Config {
+ "_name_or_path": "padmalcom/wav2vec2-large-emotion-detection-german",
+ "activation_dropout": 0.1,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSpeechClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.1,
+ "finetuning_task": "wav2vec2_clf",
+ "gradient_checkpointing": false,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "anger",
+ "1": "boredom",
+ "2": "disgust",
+ "3": "fear",
+ "4": "happiness",
+ "5": "sadness",
+ "6": "neutral"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "anger": 0,
+ "boredom": 1,
+ "disgust": 2,
+ "fear": 3,
+ "happiness": 4,
+ "neutral": 6,
+ "sadness": 5
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.05,
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 0,
+ "pooling_mode": "mean",
+ "problem_type": "single_label_classification",
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.feature b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.feature
new file mode 100644
index 0000000000000000000000000000000000000000..0be68ab30a4dc0a635948b99096fa6887785ddb2
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.onnx b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..735793c84477d43c76391b5a3e3672e5cd4a8466
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ccb3dce44226c65ae9a961595944a833fb3b027d6cfe5f2865f00e77faeda4d
+size 1263408034
diff --git a/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.processor b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.processor
new file mode 100644
index 0000000000000000000000000000000000000000..04efc329378be8b4d1bf5cec8c6119dbd4cbc8cf
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.processor
@@ -0,0 +1,21 @@
+Wav2Vec2Processor:
+- feature_extractor: Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
+
+- tokenizer: Wav2Vec2CTCTokenizer(name_or_path='padmalcom/wav2vec2-large-emotion-detection-german', vocab_size=32, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'pad_token': ''}, clean_up_tokenization_spaces=True), added_tokens_decoder={
+ 0: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 1: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 2: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 3: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+}
+
+{
+ "processor_class": "Wav2Vec2Processor"
+}
diff --git a/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.tokens b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.tokens
new file mode 100644
index 0000000000000000000000000000000000000000..f13d48d8183c87f226e5a1ffed4f27fd7001742a
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.tokens
@@ -0,0 +1,32 @@
+0:
+1:
+2:
+3:
+4: '|'
+5: E
+6: T
+7: A
+8: O
+9: N
+10: I
+11: H
+12: S
+13: R
+14: D
+15: L
+16: U
+17: M
+18: W
+19: C
+20: F
+21: G
+22: Y
+23: P
+24: B
+25: V
+26: K
+27: ''''
+28: X
+29: J
+30: Q
+31: Z
diff --git a/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.yaml b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..734404266d19ad51c1f3f6fc832baac430ffc1aa
--- /dev/null
+++ b/wav2vec2-large-emotion-detection-german_onnx/wav2vec2-large-emotion-detection-german.yaml
@@ -0,0 +1,15 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: padmalcom/wav2vec2-large-emotion-detection-german
+labels:
+ 0: anger
+ 1: boredom
+ 2: disgust
+ 3: fear
+ 4: happiness
+ 5: sadness
+ 6: neutral
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/.gitattributes b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/source.txt b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d88f344bff07190ff6c2ec6b1d302107a12d0c9e
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx
\ No newline at end of file
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.config b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.config
new file mode 100644
index 0000000000000000000000000000000000000000..a1533cafa7a6c24303a582dd40a720bffdd43eff
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.config
@@ -0,0 +1,123 @@
+Wav2Vec2Config {
+ "_name_or_path": "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim",
+ "activation_dropout": 0.1,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSpeechClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 768,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.1,
+ "finetuning_task": "wav2vec2_reg",
+ "gradient_checkpointing": false,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "arousal",
+ "1": "dominance",
+ "2": "valence"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "arousal": 0,
+ "dominance": 1,
+ "valence": 2
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.05,
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 0,
+ "pooling_mode": "mean",
+ "problem_type": "regression",
+ "proj_codevector_dim": 768,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": null,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.feature b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.feature
new file mode 100644
index 0000000000000000000000000000000000000000..0be68ab30a4dc0a635948b99096fa6887785ddb2
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.onnx b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..1b4af890dec5856d54c74d66ba8203a041ade468
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba86ebca4174bf036182063e09495f7d523b8924a1a9c621f3daa1a0ec0b3595
+size 661664947
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.processor b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.processor
new file mode 100644
index 0000000000000000000000000000000000000000..15bd74397c18b8188cb90fc62d699fdd4553745c
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.processor
@@ -0,0 +1,21 @@
+Wav2Vec2Processor:
+- feature_extractor: Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
+
+- tokenizer: Wav2Vec2CTCTokenizer(name_or_path='audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim', vocab_size=0, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'pad_token': ''}, clean_up_tokenization_spaces=True), added_tokens_decoder={
+ 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
+ 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
+ 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
+ 3: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
+}
+
+{
+ "processor_class": "Wav2Vec2Processor"
+}
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.tokens b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.tokens
new file mode 100644
index 0000000000000000000000000000000000000000..3ed10f9b14a51549d8c544423f664ee686f5a48c
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.tokens
@@ -0,0 +1,4 @@
+0:
+1:
+2:
+3:
diff --git a/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.yaml b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad57123ba212e30040e870fc86b1e710078dbfd8
--- /dev/null
+++ b/wav2vec2-large-robust-12-ft-emotion-msp-dim_onnx/wav2vec2-large-robust-12-ft-emotion-msp-dim.yaml
@@ -0,0 +1,11 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim
+labels:
+ 0: arousal
+ 1: dominance
+ 2: valence
+model: EmotionModel
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/.gitattributes b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..24e3ac88994e5a782a1f7286ca2dcffd66874da5
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+onnx/*.model filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/README.md b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7be5fc7f47d5db027d120b8024982df93db95b74
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/README.md
@@ -0,0 +1,3 @@
+---
+license: mit
+---
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/config.json b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..605cf8fe6833c77e7bd454a9a64f9952e6b5a238
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/config.json
@@ -0,0 +1,107 @@
+{
+ "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-english",
+ "activation_dropout": 0.05,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.05,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "finetuning_task": "wav2vec2_clf",
+ "gradient_checkpointing": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.05,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "angry",
+ "1": "calm",
+ "2": "disgust",
+ "3": "fearful",
+ "4": "happy",
+ "5": "neutral",
+ "6": "sad",
+ "7": "surprised"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "angry": 0,
+ "calm": 1,
+ "disgust": 2,
+ "fearful": 3,
+ "happy": 4,
+ "neutral": 5,
+ "sad": 6,
+ "surprised": 7
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.05,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "pad_token_id": 0,
+ "pooling_mode": "mean",
+ "problem_type": "single_label_classification",
+ "proj_codevector_dim": 256,
+ "transformers_version": "4.8.2",
+ "vocab_size": 33
+}
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/onnx/model_quantized.onnx b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/onnx/model_quantized.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e82cfe4ee5580c050e9b98d62dc777b6af8125cc
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/onnx/model_quantized.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e6c875f2bb1fe16d29005d6f57dda37e64e27401724353ff0d0c6b1f22d0a23
+size 1263409099
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/preprocessor_config.json b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/preprocessor_config.json
@@ -0,0 +1,9 @@
+{
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/source.txt b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bf1b11f403d52e83909735f4a16716ef95fb8da
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/Aroganta/wav2vec2-lg-xlsr-en-speech-emotion-recognition-onnx
\ No newline at end of file
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/gitattributes b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/source.txt b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a38b05aa97b73d147e3dd99bd242c16d4e27d2e
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx
\ No newline at end of file
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.config b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.config
new file mode 100644
index 0000000000000000000000000000000000000000..82f5d474a3329a735cb3061205f7a835517bfc95
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.config
@@ -0,0 +1,139 @@
+Wav2Vec2Config {
+ "_name_or_path": "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
+ "activation_dropout": 0.05,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.05,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "finetuning_task": "wav2vec2_clf",
+ "gradient_checkpointing": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.05,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "angry",
+ "1": "calm",
+ "2": "disgust",
+ "3": "fearful",
+ "4": "happy",
+ "5": "neutral",
+ "6": "sad",
+ "7": "surprised"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "angry": 0,
+ "calm": 1,
+ "disgust": 2,
+ "fearful": 3,
+ "happy": 4,
+ "neutral": 5,
+ "sad": 6,
+ "surprised": 7
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.05,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 0,
+ "pooling_mode": "mean",
+ "problem_type": "single_label_classification",
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 33,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.feature b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.feature
new file mode 100644
index 0000000000000000000000000000000000000000..0be68ab30a4dc0a635948b99096fa6887785ddb2
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.onnx b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..b545473307b64d9b6afa2491f5fbcfc644f39045
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b60ff8a70d81410ae56755140ba8b0456b6eb59f517da9ec69ce6d78103416ff
+size 1263409062
diff --git a/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.yaml b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b56158a32b06ea7bbc1da476de135c869a8695f0
--- /dev/null
+++ b/wav2vec2-lg-xlsr-en-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-en-speech-emotion-recognition.yaml
@@ -0,0 +1,16 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+labels:
+ 0: angry
+ 1: calm
+ 2: disgust
+ 3: fearful
+ 4: happy
+ 5: neutral
+ 6: sad
+ 7: surprised
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/.gitattributes b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/source.txt b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ab9327681492a7a3f29e4bcc4c2c53068beb12d
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx
\ No newline at end of file
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.config b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.config
new file mode 100644
index 0000000000000000000000000000000000000000..6279adac6a5e76c02dcbbd6592e5b91ac6a39a12
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.config
@@ -0,0 +1,130 @@
+Wav2Vec2Config {
+ "_name_or_path": "Lajavaness/wav2vec2-lg-xlsr-fr-speech-emotion-recognition",
+ "activation_dropout": 0.05,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.05,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.05,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "Pleased",
+ "1": "Relaxed",
+ "2": "Neutral",
+ "3": "Sad",
+ "4": "Tension"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "E1": 0,
+ "E2": 1,
+ "E3": 2,
+ "E4": 3,
+ "E6": 4
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.05,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 59,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.feature b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.feature
new file mode 100644
index 0000000000000000000000000000000000000000..6399a8dec4c059345467cc4f232bf1c49004dd11
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.feature
@@ -0,0 +1,10 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "processor_class": "Wav2Vec2Processor",
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.onnx b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..483f9b8ac7634767e65f289365eb09d36b86aac7
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a0187561d6b29afde6f80a4cad3852705104ab41aa00a6f358be38ccd6ba560
+size 1263405978
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.processor b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.processor
new file mode 100644
index 0000000000000000000000000000000000000000..b5f981d023bc3d5842ea9ba066dbb5521437ae1b
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.processor
@@ -0,0 +1,22 @@
+Wav2Vec2Processor:
+- feature_extractor: Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "processor_class": "Wav2Vec2Processor",
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
+
+- tokenizer: Wav2Vec2CTCTokenizer(name_or_path='Lajavaness/wav2vec2-lg-xlsr-fr-speech-emotion-recognition', vocab_size=59, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'pad_token': ''}, clean_up_tokenization_spaces=True), added_tokens_decoder={
+ 0: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 1: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 2: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+ 3: AddedToken("", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
+}
+
+{
+ "processor_class": "Wav2Vec2Processor"
+}
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.tokens b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.tokens
new file mode 100644
index 0000000000000000000000000000000000000000..c35e017fa7827990b3da676bc1bd211911b8a071
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.tokens
@@ -0,0 +1,59 @@
+0:
+1:
+2:
+3:
+4: '|'
+5: ''''
+6: '-'
+7: a
+8: b
+9: c
+10: d
+11: e
+12: f
+13: g
+14: h
+15: i
+16: j
+17: k
+18: l
+19: m
+20: n
+21: o
+22: p
+23: q
+24: r
+25: s
+26: t
+27: u
+28: v
+29: w
+30: x
+31: y
+32: z
+33: à
+34: á
+35: â
+36: ä
+37: ç
+38: è
+39: é
+40: ê
+41: ë
+42: í
+43: î
+44: ï
+45: ñ
+46: ó
+47: ô
+48: ö
+49: ù
+50: ú
+51: û
+52: ü
+53: ć
+54: č
+55: ō
+56: œ
+57: š
+58: ș
diff --git a/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.yaml b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d311b48efe4624563f5c520349bc8290429be16d
--- /dev/null
+++ b/wav2vec2-lg-xlsr-fr-speech-emotion-recognition_onnx/wav2vec2-lg-xlsr-fr-speech-emotion-recognition.yaml
@@ -0,0 +1,13 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: Lajavaness/wav2vec2-lg-xlsr-fr-speech-emotion-recognition
+labels:
+ 0: Pleased
+ 1: Relaxed
+ 2: Neutral
+ 3: Sad
+ 4: Tension
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/.gitattributes b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/source.txt b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4dbd414d9537d7441772781a88d7e5c996bf016
--- /dev/null
+++ b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx
\ No newline at end of file
diff --git a/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.config b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.config
new file mode 100644
index 0000000000000000000000000000000000000000..2e72c8dbf9f8db04dbe1aabdedd4ce8d1f2637a2
--- /dev/null
+++ b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.config
@@ -0,0 +1,130 @@
+Wav2Vec2Config {
+ "_name_or_path": "canlinzhang/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": false,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.1,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "freeze_feat_extract_train": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "neu",
+ "1": "ang",
+ "2": "sad",
+ "3": "hap"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "ang": 1,
+ "hap": 3,
+ "neu": 0,
+ "sad": 2
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.0,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "no_mask_channel_overlap": false,
+ "no_mask_time_overlap": false,
+ "num_adapter_layers": 3,
+ "num_attention_heads": 12,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 12,
+ "num_negatives": 100,
+ "output_hidden_size": 768,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 512
+}
diff --git a/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.feature b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.feature
new file mode 100644
index 0000000000000000000000000000000000000000..8b0fc1ee98d994e693f7f5affb8e3d387b66b793
--- /dev/null
+++ b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.onnx b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..797d135851569c37b8d95c11f4c823acf2d42276
--- /dev/null
+++ b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f972a615732d3b72e6472b4ba9307b8799c700e03c9b178306e47f3ee496845f
+size 378581007
diff --git a/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.yaml b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab0c91027c5fb05cd7563e6544b3d0ddbb31cba0
--- /dev/null
+++ b/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP_onnx/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP.yaml
@@ -0,0 +1,12 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: canlinzhang/wav2vec2_speech_emotion_recognition_trained_on_IEMOCAP
+labels:
+ 0: neu
+ 1: ang
+ 2: sad
+ 3: hap
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/.gitattributes b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/source.txt b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94f4ae1612141f211312f7e08271da8203b12ab1
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/source.txt
@@ -0,0 +1,2 @@
+
+https://huggingface.co/steveway/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx
\ No newline at end of file
diff --git a/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.config b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.config
new file mode 100644
index 0000000000000000000000000000000000000000..5d166e5f1b76fac4c5cf69db969ccd8a14f0a020
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.config
@@ -0,0 +1,133 @@
+Wav2Vec2Config {
+ "_name_or_path": "Hamzaaa/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSequenceClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.0,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "finetuning_task": "wav2vec2_clf",
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "anger",
+ "1": "disgust",
+ "2": "fear",
+ "3": "happiness",
+ "4": "sadness"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "anger": 0,
+ "disgust": 1,
+ "fear": 2,
+ "happiness": 3,
+ "sadness": 4
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 54,
+ "pooling_mode": "mean",
+ "problem_type": "single_label_classification",
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 55,
+ "xvector_output_dim": 512
+}
diff --git a/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.feature b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.feature
new file mode 100644
index 0000000000000000000000000000000000000000..8b0fc1ee98d994e693f7f5affb8e3d387b66b793
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.onnx b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..73122dc451c7e5e479a6e6e09f0ccb5dba42791c
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5e94acc528d4a6828ecda9e9cbb6597f8ef9e5a383653f3082243044763c249
+size 1263405978
diff --git a/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.yaml b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eceabbb57e83e6dd33b00486ef9028f371262e67
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee_onnx/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee.yaml
@@ -0,0 +1,13 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: Hamzaaa/xlsr-wav2vec-speech-emotion-recognition-finetuned-Savee
+labels:
+ 0: anger
+ 1: disgust
+ 2: fear
+ 3: happiness
+ 4: sadness
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000
diff --git a/xlsr-wav2vec-speech-emotion-recognition_onnx/.gitattributes b/xlsr-wav2vec-speech-emotion-recognition_onnx/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition_onnx/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/xlsr-wav2vec-speech-emotion-recognition_onnx/source.txt b/xlsr-wav2vec-speech-emotion-recognition_onnx/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88f87701f71e0443807f67f2c421650de97fea87
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition_onnx/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/steveway/xlsr-wav2vec-speech-emotion-recognition_onnx
\ No newline at end of file
diff --git a/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.config b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.config
new file mode 100644
index 0000000000000000000000000000000000000000..bbc1985b8344da840e4fdd3f8147734119a0e879
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.config
@@ -0,0 +1,134 @@
+Wav2Vec2Config {
+ "_name_or_path": "harshit345/xlsr-wav2vec-speech-emotion-recognition",
+ "activation_dropout": 0.0,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2ForSpeechClassification"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": true,
+ "conv_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512,
+ 512
+ ],
+ "conv_kernel": [
+ 10,
+ 3,
+ 3,
+ 3,
+ 3,
+ 2,
+ 2
+ ],
+ "conv_stride": [
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "ctc_loss_reduction": "mean",
+ "ctc_zero_infinity": true,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": true,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "layer",
+ "feat_proj_dropout": 0.0,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.0,
+ "finetuning_task": "wav2vec2_clf",
+ "gradient_checkpointing": true,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_size": 1024,
+ "id2label": {
+ "0": "anger",
+ "1": "disgust",
+ "2": "fear",
+ "3": "happiness",
+ "4": "sadness"
+ },
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "label2id": {
+ "anger": 0,
+ "disgust": 1,
+ "fear": 2,
+ "happiness": 3,
+ "sadness": 4
+ },
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_channel_length": 10,
+ "mask_channel_min_space": 1,
+ "mask_channel_other": 0.0,
+ "mask_channel_prob": 0.0,
+ "mask_channel_selection": "static",
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 10,
+ "mask_time_min_masks": 2,
+ "mask_time_min_space": 1,
+ "mask_time_other": 0.0,
+ "mask_time_prob": 0.05,
+ "mask_time_selection": "static",
+ "model_type": "wav2vec2",
+ "num_adapter_layers": 3,
+ "num_attention_heads": 16,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 16,
+ "num_conv_pos_embeddings": 128,
+ "num_feat_extract_layers": 7,
+ "num_hidden_layers": 24,
+ "num_negatives": 100,
+ "output_hidden_size": 1024,
+ "pad_token_id": 54,
+ "pooling_mode": "mean",
+ "problem_type": "single_label_classification",
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2,
+ 3,
+ 1,
+ 1
+ ],
+ "tdnn_dim": [
+ 512,
+ 512,
+ 512,
+ 512,
+ 1500
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3,
+ 3,
+ 1,
+ 1
+ ],
+ "torch_dtype": "float32",
+ "transformers_version": "4.41.1",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 55,
+ "xvector_output_dim": 512
+}
diff --git a/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.feature b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.feature
new file mode 100644
index 0000000000000000000000000000000000000000..0be68ab30a4dc0a635948b99096fa6887785ddb2
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.feature
@@ -0,0 +1,9 @@
+Wav2Vec2FeatureExtractor {
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": true,
+ "sampling_rate": 16000
+}
diff --git a/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.onnx b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..856514b600e9c028b6d2ad2af491270ff675ac0b
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e86f0b7ea1da2ba9af9c00177449b57f5cba8a3d9c9d79f5348df86a05c1d4d
+size 1263405978
diff --git a/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.yaml b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dec898489593084ad39e95723f542a97a899865f
--- /dev/null
+++ b/xlsr-wav2vec-speech-emotion-recognition_onnx/xlsr-wav2vec-speech-emotion-recognition.yaml
@@ -0,0 +1,13 @@
+feature_extractor: Wav2Vec2FeatureExtractor
+full_name: harshit345/xlsr-wav2vec-speech-emotion-recognition
+labels:
+ 0: anger
+ 1: disgust
+ 2: fear
+ 3: happiness
+ 4: sadness
+model: Wav2Vec2ForSequenceClassification
+sampling_rate: 16000
+shape:
+- 1
+- 320000