qwen3-asr-0.6b-coreml / f32 /metadata.json
Alex-Wengg
refactor: clean directory structure (f32/, int8/) and remove legacy embedding model
48d1bc2
{
"model_id": "Qwen/Qwen3-ASR-0.6B",
"architecture": "Qwen3ASRForConditionalGeneration",
"sample_rate": 16000,
"num_mel_bins": 128,
"max_audio_seconds": 30.0,
"max_seq_length": 4096,
"audio_encoder": {
"n_window": 50,
"n_window_infer": 800,
"mel_window_size": 100,
"conv_downsample_factor": 8,
"d_model": 896,
"output_dim": 1024,
"num_layers": 18,
"num_heads": 14
},
"text_decoder": {
"hidden_size": 1024,
"intermediate_size": 3072,
"num_layers": 28,
"num_attention_heads": 16,
"num_kv_heads": 8,
"head_dim": 128,
"vocab_size": 151936,
"rope_theta": 1000000,
"mrope_section": [24, 20, 20]
},
"special_tokens": {
"audio_start_token_id": 151669,
"audio_end_token_id": 151670,
"audio_token_id": 151676,
"im_start_token_id": 151644,
"im_end_token_id": 151645,
"system_token_id": 8948,
"user_token_id": 872,
"assistant_token_id": 77091,
"newline_token_id": 198,
"eos_token_ids": [151645, 151643]
},
"components": {
"audio_encoder": {
"path": "qwen3_asr_audio_encoder.mlpackage",
"precision": "float16"
},
"embedding": {
"path": "qwen3_asr_embedding.mlpackage",
"precision": "float16"
},
"decoder_stack": {
"path": "qwen3_asr_decoder_stack.mlpackage",
"num_layers": 28,
"weight_precision": "float32",
"compute_precision": "float32",
"note": "All 28 decoder layers consolidated into one model with stacked KV caches. Full float32 weights and compute."
},
"lm_head": {
"path": "qwen3_asr_lm_head.mlpackage",
"weight_precision": "float32",
"compute_precision": "float32"
}
},
"export_settings": {
"compute_units": "CPU_ONLY",
"deployment_target": "iOS17"
}
}