| { | |
| "model_type": "funasr", | |
| "sample_rate": 16000, | |
| "n_mels": 80, | |
| "lfr_m": 7, | |
| "lfr_n": 6, | |
| "encoder": { | |
| "input_dim": 560, | |
| "encoder_dim": 512, | |
| "num_heads": 4, | |
| "ffn_dim": 2048, | |
| "kernel_size": 11, | |
| "num_encoders0": 1, | |
| "num_encoders": 49, | |
| "num_tp_encoders": 20, | |
| "dropout": 0.0 | |
| }, | |
| "adaptor": { | |
| "downsample_rate": 1, | |
| "encoder_dim": 512, | |
| "llm_dim": 1024, | |
| "ffn_dim": 2048, | |
| "n_layer": 2, | |
| "attention_heads": 8, | |
| "dropout": 0.0 | |
| }, | |
| "llm": { | |
| "vocab_size": 151936, | |
| "hidden_size": 1024, | |
| "num_hidden_layers": 28, | |
| "num_attention_heads": 16, | |
| "num_key_value_heads": 8, | |
| "intermediate_size": 3072, | |
| "max_position_embeddings": 40960, | |
| "rope_theta": 1000000.0, | |
| "rms_norm_eps": 1e-06, | |
| "tie_word_embeddings": false, | |
| "head_dim": 128 | |
| }, | |
| "quantization": { | |
| "bits": 4, | |
| "group_size": 64, | |
| "quantized_components": [ | |
| "llm.model.layers", | |
| "audio_adaptor" | |
| ] | |
| } | |
| } |