awni commited on
Commit
02eb0ca
·
verified ·
1 Parent(s): 1bbb179

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tekken.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: mlx
3
+ language:
4
+ - en
5
+ - fr
6
+ - es
7
+ - de
8
+ - ru
9
+ - zh
10
+ - ja
11
+ - it
12
+ - pt
13
+ - nl
14
+ - ar
15
+ - hi
16
+ - ko
17
+ license: apache-2.0
18
+ inference: false
19
+ base_model: mistralai/Voxtral-Mini-4B-Realtime-2602
20
+ extra_gated_description: If you want to learn more about how we process your personal
21
+ data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
22
+ tags:
23
+ - mistral-common
24
+ - mlx
25
+ pipeline_tag: automatic-speech-recognition
26
+ ---
27
+ # mlx-community/Voxtral-Mini-4B-Realtime-6bit
28
+
29
+ This model [mlx-community/Voxtral-Mini-4B-Realtime-6bit](https://huggingface.co/mlx-community/Voxtral-Mini-4B-Realtime-6bit) was converted to MLX format from [mistralai/Voxtral-Mini-4B-Realtime-2602](https://huggingface.co/mistralai/Voxtral-Mini-4B-Realtime-2602) using [voxmlx](https://github.com/awnihannun/voxmlx).
30
+
31
+ ## Use with voxmlx
32
+
33
+ ```bash
34
+ pip install voxmlx
35
+ ```
36
+
37
+ ```python
38
+ from voxmlx import transcribe
39
+
40
+ text = transcribe("audio.flac", model_path="mlx-community/Voxtral-Mini-4B-Realtime-6bit")
41
+ print(text)
42
+ ```
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ada_rms_norm_t_cond": true,
3
+ "ada_rms_norm_t_cond_dim": 32,
4
+ "causal": true,
5
+ "dim": 3072,
6
+ "head_dim": 128,
7
+ "hidden_dim": 9216,
8
+ "model_max_length": 131072,
9
+ "model_parallel": 1,
10
+ "multimodal": {
11
+ "whisper_model_args": {
12
+ "encoder_args": {
13
+ "audio_encoding_args": {
14
+ "sampling_rate": 16000,
15
+ "frame_rate": 12.5,
16
+ "num_mel_bins": 128,
17
+ "hop_length": 160,
18
+ "window_size": 400,
19
+ "chunk_length_s": null,
20
+ "global_log_mel_max": 1.5,
21
+ "transcription_format": "streaming"
22
+ },
23
+ "dim": 1280,
24
+ "n_layers": 32,
25
+ "head_dim": 64,
26
+ "hidden_dim": 5120,
27
+ "n_heads": 32,
28
+ "vocab_size": 131072,
29
+ "n_kv_heads": 32,
30
+ "use_biases": true,
31
+ "use_cache": false,
32
+ "rope_theta": 1000000.0,
33
+ "causal": true,
34
+ "norm_eps": 1e-05,
35
+ "pos_embed": "rope",
36
+ "max_source_positions": null,
37
+ "ffn_type": "swiglu",
38
+ "norm_type": "rms_norm",
39
+ "sliding_window": 750
40
+ },
41
+ "downsample_args": {
42
+ "downsample_factor": 4
43
+ }
44
+ }
45
+ },
46
+ "n_heads": 32,
47
+ "n_kv_heads": 8,
48
+ "n_layers": 26,
49
+ "norm_eps": 1e-05,
50
+ "quantization": {
51
+ "group_size": 64,
52
+ "bits": 6
53
+ },
54
+ "rope_theta": 1000000.0,
55
+ "sliding_window": 8192,
56
+ "tied_embeddings": true,
57
+ "use_biases": false,
58
+ "vocab_size": 131072
59
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2b8253dd386a4ce2ac130d937fa4e78ae3332021ad65b8ea1eb5c6c37112319
3
+ size 3609304614
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tekken.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8434af1d39eba99f0ef46cf1450bf1a63fa941a26933a1ef5dbbf4adf0d00e44
3
+ size 14910348