{ "model_type": "cosyvoice3", "version": "Fun-CosyVoice3-0.5B-2512", "llm": { "hidden_size": 896, "num_hidden_layers": 24, "num_attention_heads": 14, "num_key_value_heads": 2, "intermediate_size": 4864, "head_dim": 64, "max_position_embeddings": 32768, "vocab_size": 151936, "rms_norm_eps": 1e-06, "rope_theta": 1000000.0, "tie_word_embeddings": true, "speech_token_size": 6561, "text_token_size": 151936 }, "flow": { "input_size": 512, "output_size": 80, "vocab_size": 6561, "spk_embed_dim": 192, "token_frame_rate": 25, "token_mel_ratio": 2, "pre_lookahead_len": 3, "dit": { "dim": 1024, "depth": 22, "heads": 16, "dim_head": 64, "ff_mult": 2, "mel_dim": 80, "spk_dim": 80, "static_chunk_size": 50 } }, "hifigan": { "sampling_rate": 24000, "in_channels": 80, "base_channels": 512, "nb_harmonics": 8, "upsample_rates": [ 8, 5, 3 ], "upsample_kernel_sizes": [ 16, 11, 7 ], "istft_n_fft": 16, "istft_hop_len": 4, "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "source_resblock_kernel_sizes": [ 7, 7, 11 ], "nsf_alpha": 0.1, "nsf_sigma": 0.003, "nsf_voiced_threshold": 10, "audio_limit": 0.99 }, "mel": { "n_fft": 1920, "num_mels": 80, "hop_size": 480, "win_size": 1920, "sample_rate": 24000 }, "tokenizer": { "type": "fsq", "codebook_size": 6561, "frame_rate": 25 }, "quantization": { "bits": 4, "group_size": 64, "quantized_layers": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "speech_head" ] } }