xocialize commited on
Commit
f2f5dd2
·
verified ·
1 Parent(s): 85936d3

Add emotion2vec+ large MLX weights (fp16) + config + model card

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: funasr-model-license
4
+ license_link: https://huggingface.co/emotion2vec/emotion2vec_plus_large/blob/main/LICENSE
5
+ library_name: mlx
6
+ base_model: emotion2vec/emotion2vec_plus_large
7
+ pipeline_tag: audio-classification
8
+ tags:
9
+ - mlx
10
+ - audio
11
+ - audio-classification
12
+ - speech-emotion-recognition
13
+ - emotion-recognition
14
+ - emotion2vec
15
+ - data2vec
16
+ - apple-silicon
17
+ ---
18
+
19
+ # mlx-community/emotion2vec-plus-large-mlx
20
+
21
+ The **emotion2vec+ large** speech-emotion-recognition model converted to MLX format for native
22
+ inference on Apple Silicon, consumed by the [`xocialize/emotion2vec-mlx-swift`](https://github.com/xocialize/emotion2vec-mlx-swift)
23
+ Swift port. Refer to the [original model card](https://huggingface.co/emotion2vec/emotion2vec_plus_large)
24
+ for details.
25
+
26
+ ## Model
27
+
28
+ - **Family:** emotion2vec / emotion2vec+ (Ma et al., "emotion2vec: Self-Supervised Pre-Training for Speech Emotion Representation," [arXiv:2312.15185](https://arxiv.org/abs/2312.15185))
29
+ - **Architecture:** Data2Vec 2.0 — conv feature extractor → transformer encoder → 9-class linear head
30
+ - **Output:** 9-class categorical emotion (`angry`, `disgusted`, `fearful`, `happy`, `neutral`, `other`, `sad`, `surprised`, `unknown`)
31
+ - **Sample rate:** 16000 Hz, mono
32
+ - **Precision:** fp16 (233 tensors)
33
+
34
+ ## Files
35
+
36
+ - `emotion2vec_large.safetensors` — the MLX weights (fp16).
37
+ - `emotion2vec_large_config.json` — model config consumed by the loader.
38
+
39
+ ## Usage (Swift / MLX)
40
+
41
+ ```swift
42
+ import Emotion2VecMLX
43
+ import Hub
44
+
45
+ let dir = try await HubApi().snapshot(from: "mlx-community/emotion2vec-plus-large-mlx")
46
+ let recogniser = try await EmotionRecogniser(weightsDirectory: dir,
47
+ config: EmotionRecogniserConfig(models: .categorical))
48
+ let result = try await recogniser.classify(audioURL: speechURL)
49
+ print(result.categorical.label, result.categorical.confidence)
50
+ ```
51
+
52
+ ## Source
53
+
54
+ - **Original model:** https://huggingface.co/emotion2vec/emotion2vec_plus_large
55
+ - **Swift consumer:** https://github.com/xocialize/emotion2vec-mlx-swift
56
+
57
+ ## License
58
+
59
+ FunASR's custom MODEL_LICENSE — permits use, copy, modification, and redistribution with
60
+ attribution and model-name retention (no-denigration clause, no warranty). Non-SPDX but
61
+ permissive. See the [original license](https://huggingface.co/emotion2vec/emotion2vec_plus_large/blob/main/LICENSE).
emotion2vec_large.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecfbf0d668bc86d963332bb72871744184c24c84a9ac7c5d9b113d0bf55fbb94
3
+ size 324305186
emotion2vec_large_config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "emotion2vec_plus_large",
3
+ "architecture": "data2vec2",
4
+ "num_classes": 9,
5
+ "hidden_dim": 1024,
6
+ "ffn_dim": 4096,
7
+ "num_layers": 12,
8
+ "num_context_blocks": 4,
9
+ "num_shared_blocks": 8,
10
+ "num_heads": 16,
11
+ "conv_feature_layers": [
12
+ [
13
+ 1,
14
+ 512,
15
+ 10,
16
+ 5
17
+ ],
18
+ [
19
+ 512,
20
+ 512,
21
+ 3,
22
+ 2
23
+ ],
24
+ [
25
+ 512,
26
+ 512,
27
+ 3,
28
+ 2
29
+ ],
30
+ [
31
+ 512,
32
+ 512,
33
+ 3,
34
+ 2
35
+ ],
36
+ [
37
+ 512,
38
+ 512,
39
+ 3,
40
+ 2
41
+ ],
42
+ [
43
+ 512,
44
+ 512,
45
+ 2,
46
+ 2
47
+ ],
48
+ [
49
+ 512,
50
+ 512,
51
+ 2,
52
+ 2
53
+ ]
54
+ ],
55
+ "pos_conv_depth": 5,
56
+ "pos_conv_kernel": 19,
57
+ "pos_conv_groups": 16,
58
+ "feature_dim": 512,
59
+ "has_context_norm": true,
60
+ "has_extra_tokens": true,
61
+ "has_alibi_scale": true,
62
+ "num_extra_tokens": 10,
63
+ "layer_norm_first": false,
64
+ "dtype": "float16",
65
+ "emotion_labels": [
66
+ "angry",
67
+ "disgusted",
68
+ "fearful",
69
+ "happy",
70
+ "neutral",
71
+ "other",
72
+ "sad",
73
+ "surprised",
74
+ "unknown"
75
+ ]
76
+ }