Voxtral (de, en, es, fr, hi, it, nl, pt)
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +32 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/.gitattributes +65 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/README.md +178 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/chat_template.jinja +28 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/config.json +81 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/generation_config.json +6 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx_data_1 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_bnb4.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_bnb4.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_fp16.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_fp16.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_int8.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_int8.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4f16.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4f16.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_quantized.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_quantized.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_uint8.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_uint8.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_1 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_2 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_3 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_4 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_5 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_6 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_7 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_1 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_2 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_3 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx_data_1 +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4f16.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4f16.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_fp16.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_fp16.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_q4.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_q4.onnx_data +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_quantized.onnx +3 -0
- de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_quantized.onnx_data +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,35 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507_timestamped/decoder_model_attentive_q4_weight_only_inc.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507_timestamped/decoder_model_attentive.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_int8.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_uint8.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_3 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_3 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_4 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_5 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_6 filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_7 filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/tekken.json filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/.gitattributes
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
onnx/decoder_model_merged.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
onnx/decoder_model_merged.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
onnx/decoder_model_merged.onnx_data_3 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
onnx/decoder_model_merged.onnx_data_4 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
onnx/decoder_model_merged.onnx_data_5 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
onnx/decoder_model_merged.onnx_data_6 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
onnx/decoder_model_merged.onnx_data_7 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
onnx/decoder_model_merged_fp16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
onnx/decoder_model_merged_fp16.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
onnx/decoder_model_merged_fp16.onnx_data_3 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
onnx/decoder_model_merged_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
onnx/decoder_model_merged_q4.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
onnx/decoder_model_merged_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
onnx/embed_tokens.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
onnx/embed_tokens_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
tekken.json filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
onnx/audio_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
onnx/audio_encoder.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
onnx/embed_tokens_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
onnx/embed_tokens_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
onnx/audio_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
onnx/audio_encoder_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
onnx/audio_encoder_int8.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
onnx/audio_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
onnx/audio_encoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
onnx/audio_encoder_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
onnx/audio_encoder_uint8.onnx_data filter=lfs diff=lfs merge=lfs -text
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/README.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
- fr
|
| 5 |
+
- de
|
| 6 |
+
- es
|
| 7 |
+
- it
|
| 8 |
+
- pt
|
| 9 |
+
- nl
|
| 10 |
+
- hi
|
| 11 |
+
license: apache-2.0
|
| 12 |
+
library_name: transformers.js
|
| 13 |
+
base_model:
|
| 14 |
+
- mistralai/Voxtral-Mini-3B-2507
|
| 15 |
+
pipeline_tag: audio-text-to-text
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# Voxtral Mini 1.0 (3B) - 2507
|
| 19 |
+
|
| 20 |
+
Voxtral Mini is an enhancement of [Ministral 3B](https://mistral.ai/news/ministraux), incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding.
|
| 21 |
+
|
| 22 |
+
This repository contains ONNX weights for the original model, [mistralai/Voxtral-Mini-3B-2507](https://huggingface.co/mistralai/Voxtral-Mini-3B-2507).
|
| 23 |
+
|
| 24 |
+
Learn more about Voxtral in their blog post [here](https://mistral.ai/news/voxtral).
|
| 25 |
+
|
| 26 |
+
## Key Features
|
| 27 |
+
|
| 28 |
+
Voxtral builds upon Ministral-3B with powerful audio understanding capabilities.
|
| 29 |
+
- **Dedicated transcription mode**: Voxtral can operate in a pure speech transcription mode to maximize performance. By default, Voxtral automatically predicts the source audio language and transcribes the text accordingly
|
| 30 |
+
- **Long-form context**: With a 32k token context length, Voxtral handles audios up to 30 minutes for transcription, or 40 minutes for understanding
|
| 31 |
+
- **Built-in Q&A and summarization**: Supports asking questions directly through audio. Analyze audio and generate structured summaries without the need for separate ASR and language models
|
| 32 |
+
- **Natively multilingual**: Automatic language detection and state-of-the-art performance in the world’s most widely used languages (English, Spanish, French, Portuguese, Hindi, German, Dutch, Italian)
|
| 33 |
+
- **Function-calling straight from voice**: Enables direct triggering of backend functions, workflows, or API calls based on spoken user intents
|
| 34 |
+
- **Highly capable at text**: Retains the text understanding capabilities of its language model backbone, Ministral-3B
|
| 35 |
+
|
| 36 |
+
## Benchmark Results
|
| 37 |
+
|
| 38 |
+
### Audio
|
| 39 |
+
|
| 40 |
+
Average word error rate (WER) over the FLEURS, Mozilla Common Voice and Multilingual LibriSpeech benchmarks:
|
| 41 |
+
|
| 42 |
+

|
| 43 |
+
|
| 44 |
+
### Text
|
| 45 |
+
|
| 46 |
+

|
| 47 |
+
|
| 48 |
+
## Usage
|
| 49 |
+
|
| 50 |
+
**Notes**:
|
| 51 |
+
|
| 52 |
+
- `temperature=0.2` and `top_p=0.95` for chat completion (*e.g. Audio Understanding*) and `temperature=0.0` for transcription
|
| 53 |
+
- Multiple audios per message and multiple user turns with audio are supported
|
| 54 |
+
- System prompts are not yet supported
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
### Transformers.js
|
| 58 |
+
|
| 59 |
+
#### Online demo
|
| 60 |
+
|
| 61 |
+
Try it out with our [online demo](https://huggingface.co/spaces/webml-community/Voxtral-WebGPU):
|
| 62 |
+
|
| 63 |
+
<video controls src="https://cdn-uploads.huggingface.co/production/uploads/61b253b7ac5ecaae3d1efe0c/3z0psEz3VS4kbscvXEE4n.mp4"></video>
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
#### Code snippets
|
| 67 |
+
|
| 68 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
| 69 |
+
```bash
|
| 70 |
+
npm i @huggingface/transformers
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
**Example**: Transcription
|
| 74 |
+
|
| 75 |
+
```js
|
| 76 |
+
import { VoxtralForConditionalGeneration, VoxtralProcessor, TextStreamer, read_audio } from "@huggingface/transformers";
|
| 77 |
+
|
| 78 |
+
// Load the processor and model
|
| 79 |
+
const model_id = "onnx-community/Voxtral-Mini-3B-2507-ONNX";
|
| 80 |
+
const processor = await VoxtralProcessor.from_pretrained(model_id);
|
| 81 |
+
const model = await VoxtralForConditionalGeneration.from_pretrained(
|
| 82 |
+
model_id,
|
| 83 |
+
{
|
| 84 |
+
dtype: {
|
| 85 |
+
embed_tokens: "fp16", // "fp32", "fp16", "q8", "q4"
|
| 86 |
+
audio_encoder: "q4", // "fp32", "fp16", "q8", "q4", "q4f16"
|
| 87 |
+
decoder_model_merged: "q4", // "q4", "q4f16"
|
| 88 |
+
},
|
| 89 |
+
device: "webgpu",
|
| 90 |
+
},
|
| 91 |
+
);
|
| 92 |
+
|
| 93 |
+
// Prepare the conversation
|
| 94 |
+
const conversation = [
|
| 95 |
+
{
|
| 96 |
+
"role": "user",
|
| 97 |
+
"content": [
|
| 98 |
+
{ "type": "audio" },
|
| 99 |
+
{ "type": "text", "text": "lang:en [TRANSCRIBE]" },
|
| 100 |
+
],
|
| 101 |
+
}
|
| 102 |
+
];
|
| 103 |
+
const text = processor.apply_chat_template(conversation, { tokenize: false });
|
| 104 |
+
const audio = await read_audio("http://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav", 16000);
|
| 105 |
+
const inputs = await processor(text, audio);
|
| 106 |
+
|
| 107 |
+
// Generate the response
|
| 108 |
+
const generated_ids = await model.generate({
|
| 109 |
+
...inputs,
|
| 110 |
+
max_new_tokens: 256,
|
| 111 |
+
streamer: new TextStreamer(processor.tokenizer, { skip_special_tokens: true, skip_prompt: true }),
|
| 112 |
+
});
|
| 113 |
+
|
| 114 |
+
// Decode the generated tokens
|
| 115 |
+
const new_tokens = generated_ids.slice(null, [inputs.input_ids.dims.at(-1), null]);
|
| 116 |
+
const generated_texts = processor.batch_decode(
|
| 117 |
+
new_tokens,
|
| 118 |
+
{ skip_special_tokens: true },
|
| 119 |
+
);
|
| 120 |
+
console.log(generated_texts[0]);
|
| 121 |
+
// I have a dream that one day this nation will rise up and live out the true meaning of its creed.
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
**Example**: Audio understanding
|
| 126 |
+
|
| 127 |
+
```js
|
| 128 |
+
import { VoxtralForConditionalGeneration, VoxtralProcessor, TextStreamer, read_audio } from "@huggingface/transformers";
|
| 129 |
+
|
| 130 |
+
// Load the processor and model
|
| 131 |
+
const model_id = "onnx-community/Voxtral-Mini-3B-2507-ONNX";
|
| 132 |
+
const processor = await VoxtralProcessor.from_pretrained(model_id);
|
| 133 |
+
const model = await VoxtralForConditionalGeneration.from_pretrained(
|
| 134 |
+
model_id,
|
| 135 |
+
{
|
| 136 |
+
dtype: {
|
| 137 |
+
embed_tokens: "fp16", // "fp32", "fp16", "q8", "q4"
|
| 138 |
+
audio_encoder: "q4", // "fp32", "fp16", "q8", "q4", "q4f16"
|
| 139 |
+
decoder_model_merged: "q4", // "q4", "q4f16"
|
| 140 |
+
},
|
| 141 |
+
device: "webgpu",
|
| 142 |
+
},
|
| 143 |
+
);
|
| 144 |
+
|
| 145 |
+
// Prepare the conversation
|
| 146 |
+
const conversation = [
|
| 147 |
+
{
|
| 148 |
+
"role": "user",
|
| 149 |
+
"content": [
|
| 150 |
+
{ "type": "audio" },
|
| 151 |
+
{ "type": "audio" },
|
| 152 |
+
{ "type": "text", "text": "Describe these two audio clips in detail." },
|
| 153 |
+
],
|
| 154 |
+
}
|
| 155 |
+
];
|
| 156 |
+
const text = processor.apply_chat_template(conversation, { tokenize: false });
|
| 157 |
+
const audio = await Promise.all([
|
| 158 |
+
read_audio("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav", 16000),
|
| 159 |
+
read_audio("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav", 16000),
|
| 160 |
+
]);
|
| 161 |
+
const inputs = await processor(text, audio);
|
| 162 |
+
|
| 163 |
+
// Generate the response
|
| 164 |
+
const generated_ids = await model.generate({
|
| 165 |
+
...inputs,
|
| 166 |
+
max_new_tokens: 256,
|
| 167 |
+
streamer: new TextStreamer(processor.tokenizer, { skip_special_tokens: true, skip_prompt: true }),
|
| 168 |
+
});
|
| 169 |
+
|
| 170 |
+
// Decode the generated tokens
|
| 171 |
+
const new_tokens = generated_ids.slice(null, [inputs.input_ids.dims.at(-1), null]);
|
| 172 |
+
const generated_texts = processor.batch_decode(
|
| 173 |
+
new_tokens,
|
| 174 |
+
{ skip_special_tokens: true },
|
| 175 |
+
);
|
| 176 |
+
console.log(generated_texts[0]);
|
| 177 |
+
// The first audio clip is a speech by a leader, likely a politician or a public figure, addressing a large audience. The speaker begins by encouraging the listeners to ask not what their country can do for them, but what they can do for their country. This is a call to action and a reminder of the individual's responsibility to contribute to the nation's well-being. The second audio clip is a passionate speech by a different leader, possibly a civil rights activist or a community organizer. This speaker expresses a dream of a nation that will rise up and live out the true meaning of its creed, suggesting a vision of a more just and equitable society.
|
| 178 |
+
```
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/chat_template.jinja
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{- bos_token -}}
|
| 2 |
+
{%- for message in messages -%}
|
| 3 |
+
{%- if (message["role"] == "user") == (loop.index % 2 == 0) -%}
|
| 4 |
+
{{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") -}}
|
| 5 |
+
{%- endif -%}
|
| 6 |
+
{%- if message["role"] == "user" -%}
|
| 7 |
+
{{- "[INST]" -}}
|
| 8 |
+
{%- if message["content"] is string -%}
|
| 9 |
+
{{- message["content"] -}}
|
| 10 |
+
{%- else -%}
|
| 11 |
+
{%- for item in message["content"] -%}
|
| 12 |
+
{%- if item["type"] == "audio" -%}
|
| 13 |
+
{{- "[AUDIO]" -}}
|
| 14 |
+
{%- elif item["type"] == "text" -%}
|
| 15 |
+
{{- item["text"] -}}
|
| 16 |
+
{%- endif -%}
|
| 17 |
+
{%- endfor -%}
|
| 18 |
+
{%- endif -%}
|
| 19 |
+
{{- "[/INST]" -}}
|
| 20 |
+
{%- elif message["role"] == "assistant" -%}
|
| 21 |
+
{%- if message["content"] is not string -%}
|
| 22 |
+
{{- raise_exception("Assistant message content should be a string.") -}}
|
| 23 |
+
{%- endif -%}
|
| 24 |
+
{{- message["content"] + eos_token -}}
|
| 25 |
+
{%- else -%}
|
| 26 |
+
{{- raise_exception("Only user and assistant roles are supported!") -}}
|
| 27 |
+
{%- endif -%}
|
| 28 |
+
{%- endfor -%}
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/config.json
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"VoxtralForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"audio_config": {
|
| 6 |
+
"activation_dropout": 0.0,
|
| 7 |
+
"activation_function": "gelu",
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_size": 1280,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 5120,
|
| 14 |
+
"layerdrop": 0.0,
|
| 15 |
+
"max_source_positions": 1500,
|
| 16 |
+
"model_type": "voxtral_encoder",
|
| 17 |
+
"num_attention_heads": 20,
|
| 18 |
+
"num_hidden_layers": 32,
|
| 19 |
+
"num_key_value_heads": 20,
|
| 20 |
+
"num_mel_bins": 128,
|
| 21 |
+
"scale_embedding": false,
|
| 22 |
+
"vocab_size": 51866
|
| 23 |
+
},
|
| 24 |
+
"audio_token_id": 24,
|
| 25 |
+
"hidden_size": 3072,
|
| 26 |
+
"model_type": "voxtral",
|
| 27 |
+
"projector_hidden_act": "gelu",
|
| 28 |
+
"text_config": {
|
| 29 |
+
"attention_bias": false,
|
| 30 |
+
"attention_dropout": 0.0,
|
| 31 |
+
"head_dim": 128,
|
| 32 |
+
"hidden_act": "silu",
|
| 33 |
+
"hidden_size": 3072,
|
| 34 |
+
"initializer_range": 0.02,
|
| 35 |
+
"intermediate_size": 8192,
|
| 36 |
+
"max_position_embeddings": 131072,
|
| 37 |
+
"mlp_bias": false,
|
| 38 |
+
"model_type": "llama",
|
| 39 |
+
"num_attention_heads": 32,
|
| 40 |
+
"num_hidden_layers": 30,
|
| 41 |
+
"num_key_value_heads": 8,
|
| 42 |
+
"pretraining_tp": 1,
|
| 43 |
+
"rms_norm_eps": 1e-05,
|
| 44 |
+
"rope_scaling": null,
|
| 45 |
+
"rope_theta": 100000000.0,
|
| 46 |
+
"sliding_window": null,
|
| 47 |
+
"use_cache": true,
|
| 48 |
+
"vocab_size": 131072
|
| 49 |
+
},
|
| 50 |
+
"torch_dtype": "bfloat16",
|
| 51 |
+
"transformers_version": "4.54.0.dev0",
|
| 52 |
+
|
| 53 |
+
"transformers.js_config": {
|
| 54 |
+
"dtype": {
|
| 55 |
+
"embed_tokens": "fp16"
|
| 56 |
+
},
|
| 57 |
+
"kv_cache_dtype": {
|
| 58 |
+
"q4f16": "float16",
|
| 59 |
+
"fp16": "float16"
|
| 60 |
+
},
|
| 61 |
+
"use_external_data_format": {
|
| 62 |
+
"audio_encoder.onnx": 2,
|
| 63 |
+
"audio_encoder_fp16.onnx": 1,
|
| 64 |
+
"audio_encoder_int8.onnx": 1,
|
| 65 |
+
"audio_encoder_uint8.onnx": 1,
|
| 66 |
+
"audio_encoder_quantized.onnx": 1,
|
| 67 |
+
"audio_encoder_q4.onnx": 1,
|
| 68 |
+
"audio_encoder_q4f16.onnx": 1,
|
| 69 |
+
"audio_encoder_bnb4.onnx": 1,
|
| 70 |
+
"decoder_model_merged.onnx": 8,
|
| 71 |
+
"decoder_model_merged_fp16.onnx": 4,
|
| 72 |
+
"decoder_model_merged_q4.onnx": 2,
|
| 73 |
+
"decoder_model_merged_q4f16.onnx": 1,
|
| 74 |
+
"embed_tokens.onnx": 1,
|
| 75 |
+
"embed_tokens_fp16.onnx": 1,
|
| 76 |
+
"embed_tokens_quantized.onnx": 1,
|
| 77 |
+
"embed_tokens_q4.onnx": 1
|
| 78 |
+
}
|
| 79 |
+
},
|
| 80 |
+
"vocab_size": 131072
|
| 81 |
+
}
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 1,
|
| 3 |
+
"eos_token_id": 2,
|
| 4 |
+
"pad_token_id": 11,
|
| 5 |
+
"transformers_version": "4.54.0.dev0"
|
| 6 |
+
}
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47fc3e2c046b8c9ba19dc4ffe195b6606840edc74179e69bd4f7261686cff511
|
| 3 |
+
size 357190
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb1a2baa8eb44a51cbef5cc91ead26a7226c8004d0084b873dedbc4efd1712ba
|
| 3 |
+
size 2095319040
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder.onnx_data_1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2125dbd94d2405064f503c9ca13c1efdddfdd9df2854aeb76e0a2ac77111dc29
|
| 3 |
+
size 553220096
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_bnb4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4418e53b50540e6581cbce958a1d705c31741965ed01fd42346d47254006d6a
|
| 3 |
+
size 404516
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_bnb4.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5652ecf0027b5e65f797c1f444c538e6673940fe9116c769c426890f8d54fd9e
|
| 3 |
+
size 399343616
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8367654c6e1b64e799fd581bc607aca3aa77b6bcb0a532a6326a1932aa6728b
|
| 3 |
+
size 359615
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_fp16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c5a7fb2d6784483943d30040b2c17d5186cbd5f7ab14e027c3abd7e566a32e
|
| 3 |
+
size 1324269568
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddb743e492888cf8eb3fbc77efbfd6a5154f4b21a39a8d3e22cf29832719d283
|
| 3 |
+
size 600567
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_int8.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf06e83302b02f9fb59751eaf594d8677c1d3d5ae2541d8b28cceedaeeb0f819
|
| 3 |
+
size 669384704
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38b4e8bc6b7dda75a3c52e358a44a8c4e144b4878ff8867bd0cb41a402d4af81
|
| 3 |
+
size 401545
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07537c1791da2d198edca192b7b15c4199c1c70c3eb095d7a432431bc52e1ae5
|
| 3 |
+
size 440238080
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4f16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:156a6291053180757636c0caf5f6be3a4e08b90afd93d77646a6bd609ace206e
|
| 3 |
+
size 403958
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_q4f16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62eba37624325b3b21c51edf6c6823aaa8ce9657979dbffc31751c7f0d8d9709
|
| 3 |
+
size 383696896
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_quantized.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4db2e57b57f836f3f12deecba456785c888ddc4768cbe1d6c28e379ee479dc79
|
| 3 |
+
size 603112
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_quantized.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29a112e764c22a77d20fe5b4925c709a2188a3e5264ef76116420db469fb7fdf
|
| 3 |
+
size 669384704
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_uint8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:644e50318802c691623b85fd940ac60164cd904b72d1d81f658fbbab22c6506d
|
| 3 |
+
size 601155
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/audio_encoder_uint8.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29a112e764c22a77d20fe5b4925c709a2188a3e5264ef76116420db469fb7fdf
|
| 3 |
+
size 669384704
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e101f5f00fad06ff4de9ba06a18e529b1b238c77cecbaf7a83f2dd21f336209
|
| 3 |
+
size 226717
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f08d2cb408774380ae5f25aedb2b29f08b2a67167bfaddb83833127d0988272d
|
| 3 |
+
size 2005000192
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8824af4dd1a6bda4c348924701d2b78e9b9b982eee6e79c00c311fbe71436720
|
| 3 |
+
size 2038554624
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906367bee2870dedf649e48c59cbba1602b0c009b80a2fe75d1e3ead80ed4eee
|
| 3 |
+
size 2088873984
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f0dd468461fb57d33b5a3cca33442bcf7f53c73d9a5818419d0cdb16c6340e5
|
| 3 |
+
size 2063720448
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e8db09e022033239103c7ab042e8297b0768d0c4566d6fa9b64611dd62f3c13
|
| 3 |
+
size 2038542336
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:531cd0d701944232b7487648b7154e19fab693b9ee120469543362e380923fb8
|
| 3 |
+
size 2038554624
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_6
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19ce9b0777369a103d6c6f2700909b11c3ed9ac8c17ced14730829482d3d4a6a
|
| 3 |
+
size 629182464
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged.onnx_data_7
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:152030f3a13ed418571c431be4275b634eb40ea554ef74f917526ee896413962
|
| 3 |
+
size 1610612736
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c77e254b27448c04b6f027c8040ce4f2429f999453b496ea2af8e79aa4a9c195
|
| 3 |
+
size 228186
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ae4f71eb0994d28e915b5a5e12c3b1bbfb81b7ced25be4bd6698d60e71cb276
|
| 3 |
+
size 2072109056
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f207b9da920da6f837d35f38611c65ef3e28e089b91d19272273c4deac797204
|
| 3 |
+
size 2088886272
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf5cab82a3871665c485cf00d199aa55cac4249487219928cb0632527ea7228f
|
| 3 |
+
size 2076297216
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_fp16.onnx_data_3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28b309c4dde22701d67a210b1c90bca5ae143fc2de33b98ed43ef14488880fa6
|
| 3 |
+
size 1019228160
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25b33b24b1737410defd6035e84664f3810d73b1591b4f350dda1911910c0175
|
| 3 |
+
size 306657
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:224f9159d5c1843babd6cc99f220c14ce42f7400c56a1cf5102adea8ec029b08
|
| 3 |
+
size 2073260032
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4.onnx_data_1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdd87806c69de54c19576e425639109e2961196c4618843e0c0ce389af93b531
|
| 3 |
+
size 251658240
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4f16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00ef33f598043ce7640b2353ca13511bb40c2ce0ecc049cc4930f8fa515cd8c2
|
| 3 |
+
size 308330
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/decoder_model_merged_q4f16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35f8d88fe9d0af7759bbcd7e34843687e8768a4088c229757601aa6a1be1dc2a
|
| 3 |
+
size 2065283072
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1b1350a79d62150ebb3899cbf824ccf74820b94ca23b9f0ff67b69834499944
|
| 3 |
+
size 299
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3801b085f29d922117bcc4e0c484f1652de50b015d8ff6721303584a0d3e914f
|
| 3 |
+
size 1610612736
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed64cab8592c4731ac4c8b4fc59baad6756f040a642390ee07c7e9c9ec56879a
|
| 3 |
+
size 494
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_fp16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f8ec7534e487edaec0d707980ecf4cd4bf66db7ecc6ef8cfb5f717a565a0616
|
| 3 |
+
size 805306368
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0aced72ee68efcb5feaccbe5d3bb454e8a6d44cec9cfd6f5aece7272255d43a3
|
| 3 |
+
size 542
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_q4.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a2dbc92241c261aa99b0fb920ccfdcf9c4f2d3b97ed51c06d670c99e633e594
|
| 3 |
+
size 251658240
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_quantized.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:198b066c7f5bef2776e001934ac2eff76ea89e93d072d577ace5ba16ab08281e
|
| 3 |
+
size 552
|
de,en,es,fr,hi,it,nl,pt/Voxtral-Mini-3B-2507-ONNX/onnx/embed_tokens_quantized.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:681ef4fdd1f9e7857872405b174dc68955cd75428e35591376c2630f7b5bdc01
|
| 3 |
+
size 402653184
|