{ "name": "SenseVoiceSmall-coreml", "upstream": { "model": "FunAudioLLM/SenseVoiceSmall", "code": "https://github.com/FunAudioLLM/SenseVoice", "model_revision": "3eb3b4eeffc2f2dde6051b853983753db33e35c3", "code_commit": "b97857e5c807a466d0a062cf84dba9fe38a1d66a", "assets": [ { "name": "cmvn_am.mvn", "url": "https://modelscope.cn/api/v1/models/iic/SenseVoiceSmall/repo?Revision=master&FilePath=am.mvn", "sha256": "29b3c740a2c0cfc6b308126d31d7f265fa2be74f3bb095cd2f143ea970896ae5", "bytes": 11203 }, { "name": "spm", "url": "https://modelscope.cn/api/v1/models/iic/SenseVoiceSmall/repo?Revision=master&FilePath=chn_jpn_yue_eng_ko_spectok.bpe.model", "sha256": "aa87f86064c3730d799ddf7af3c04659151102cba548bce325cf06ba4da4e6a8", "bytes": 377341 } ] }, "artifacts": { "coreml_zip": "coreml/SenseVoiceSmall.mlmodelc.zip" }, "coreml": { "format": "mlmodelc", "inputs": [ { "name": "speech", "dtype": "float32", "shape": [ 1, "T", 560 ] }, { "name": "speech_lengths", "dtype": "int32", "shape": [ 1 ] }, { "name": "language", "dtype": "int32", "shape": [ 1 ] }, { "name": "textnorm", "dtype": "int32", "shape": [ 1 ] } ], "outputs": [ { "name": "ctc_logits" }, { "name": "encoder_out_lens" } ] }, "decoding": { "ctc_blank_id": 0, "token_offset": 0 }, "notes": [ "Do not run a single forward pass on extremely long audio; use VAD/chunking." ], "build": { "artifact": "coreml/SenseVoiceSmall.mlmodelc.zip", "sha256": "880711fa03577363e6c1b1b6e9321f130ea1a53d5c065d92e1abd8a431bad6be", "bytes": 432164139, "xcodebuild_version": "Xcode 26.0\nBuild version 17A324" } }