| { | |
| "model_type": "cosyvoice3", | |
| "version": "Fun-CosyVoice3-0.5B-2512", | |
| "llm": { | |
| "hidden_size": 896, | |
| "num_hidden_layers": 24, | |
| "num_attention_heads": 14, | |
| "num_key_value_heads": 2, | |
| "intermediate_size": 4864, | |
| "head_dim": 64, | |
| "max_position_embeddings": 32768, | |
| "vocab_size": 151936, | |
| "rms_norm_eps": 1e-06, | |
| "rope_theta": 1000000.0, | |
| "tie_word_embeddings": true, | |
| "speech_token_size": 6561, | |
| "text_token_size": 151936 | |
| }, | |
| "flow": { | |
| "input_size": 512, | |
| "output_size": 80, | |
| "vocab_size": 6561, | |
| "spk_embed_dim": 192, | |
| "token_frame_rate": 25, | |
| "token_mel_ratio": 2, | |
| "pre_lookahead_len": 3, | |
| "dit": { | |
| "dim": 1024, | |
| "depth": 22, | |
| "heads": 16, | |
| "dim_head": 64, | |
| "ff_mult": 2, | |
| "mel_dim": 80, | |
| "spk_dim": 80, | |
| "static_chunk_size": 50 | |
| } | |
| }, | |
| "hifigan": { | |
| "sampling_rate": 24000, | |
| "in_channels": 80, | |
| "base_channels": 512, | |
| "nb_harmonics": 8, | |
| "upsample_rates": [ | |
| 8, | |
| 5, | |
| 3 | |
| ], | |
| "upsample_kernel_sizes": [ | |
| 16, | |
| 11, | |
| 7 | |
| ], | |
| "istft_n_fft": 16, | |
| "istft_hop_len": 4, | |
| "resblock_kernel_sizes": [ | |
| 3, | |
| 7, | |
| 11 | |
| ], | |
| "resblock_dilation_sizes": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ], | |
| "source_resblock_kernel_sizes": [ | |
| 7, | |
| 7, | |
| 11 | |
| ], | |
| "nsf_alpha": 0.1, | |
| "nsf_sigma": 0.003, | |
| "nsf_voiced_threshold": 10, | |
| "audio_limit": 0.99 | |
| }, | |
| "mel": { | |
| "n_fft": 1920, | |
| "num_mels": 80, | |
| "hop_size": 480, | |
| "win_size": 1920, | |
| "sample_rate": 24000 | |
| }, | |
| "tokenizer": { | |
| "type": "fsq", | |
| "codebook_size": 6561, | |
| "frame_rate": 25 | |
| }, | |
| "quantization": { | |
| "bits": 4, | |
| "group_size": 64, | |
| "quantized_layers": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj", | |
| "speech_head" | |
| ] | |
| } | |
| } |