{ "model_type": "funasr", "sample_rate": 16000, "n_mels": 80, "lfr_m": 7, "lfr_n": 6, "encoder": { "input_dim": 560, "encoder_dim": 512, "num_heads": 4, "ffn_dim": 2048, "kernel_size": 11, "num_encoders0": 1, "num_encoders": 49, "num_tp_encoders": 20, "dropout": 0.0 }, "adaptor": { "downsample_rate": 1, "encoder_dim": 512, "llm_dim": 1024, "ffn_dim": 2048, "n_layer": 2, "attention_heads": 8, "dropout": 0.0 }, "llm": { "vocab_size": 151936, "hidden_size": 1024, "num_hidden_layers": 28, "num_attention_heads": 16, "num_key_value_heads": 8, "intermediate_size": 3072, "max_position_embeddings": 40960, "rope_theta": 1000000.0, "rms_norm_eps": 1e-06, "tie_word_embeddings": false, "head_dim": 128 }, "quantization": { "bits": 4, "group_size": 64, "quantized_components": [ "llm.model.layers", "audio_adaptor" ] } }