File size: 2,025 Bytes
e3368c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
{
"name": "SenseVoiceSmall-coreml",
"upstream": {
"model": "FunAudioLLM/SenseVoiceSmall",
"code": "https://github.com/FunAudioLLM/SenseVoice",
"model_revision": "3eb3b4eeffc2f2dde6051b853983753db33e35c3",
"code_commit": "b97857e5c807a466d0a062cf84dba9fe38a1d66a",
"assets": [
{
"name": "cmvn_am.mvn",
"url": "https://modelscope.cn/api/v1/models/iic/SenseVoiceSmall/repo?Revision=master&FilePath=am.mvn",
"sha256": "29b3c740a2c0cfc6b308126d31d7f265fa2be74f3bb095cd2f143ea970896ae5",
"bytes": 11203
},
{
"name": "spm",
"url": "https://modelscope.cn/api/v1/models/iic/SenseVoiceSmall/repo?Revision=master&FilePath=chn_jpn_yue_eng_ko_spectok.bpe.model",
"sha256": "aa87f86064c3730d799ddf7af3c04659151102cba548bce325cf06ba4da4e6a8",
"bytes": 377341
}
]
},
"artifacts": {
"coreml_zip": "coreml/SenseVoiceSmall.mlmodelc.zip"
},
"coreml": {
"format": "mlmodelc",
"inputs": [
{
"name": "speech",
"dtype": "float32",
"shape": [
1,
"T",
560
]
},
{
"name": "speech_lengths",
"dtype": "int32",
"shape": [
1
]
},
{
"name": "language",
"dtype": "int32",
"shape": [
1
]
},
{
"name": "textnorm",
"dtype": "int32",
"shape": [
1
]
}
],
"outputs": [
{
"name": "ctc_logits"
},
{
"name": "encoder_out_lens"
}
]
},
"decoding": {
"ctc_blank_id": 0,
"token_offset": 0
},
"notes": [
"Do not run a single forward pass on extremely long audio; use VAD/chunking."
],
"build": {
"artifact": "coreml/SenseVoiceSmall.mlmodelc.zip",
"sha256": "880711fa03577363e6c1b1b6e9321f130ea1a53d5c065d92e1abd8a431bad6be",
"bytes": 432164139,
"xcodebuild_version": "Xcode 26.0\nBuild version 17A324"
}
}
|