| { | |
| "patch_size": [ | |
| 1, | |
| 2, | |
| 2 | |
| ], | |
| "num_attention_heads": 12, | |
| "attention_head_dim": 128, | |
| "in_channels": 16, | |
| "out_channels": 16, | |
| "text_dim": 4096, | |
| "freq_dim": 256, | |
| "ffn_dim": 8960, | |
| "num_layers": 30, | |
| "cross_attn_norm": true, | |
| "qk_norm": "rms_norm_across_heads", | |
| "eps": 1e-06, | |
| "image_dim": null, | |
| "mel_dim": null, | |
| "added_kv_proj_dim": 1536, | |
| "context_pre_only": true, | |
| "rope_max_seq_len": 1024, | |
| "rope_max_seq_len_text": 2048, | |
| "mel_num_attention_heads": 20, | |
| "mel_in_channels": 8, | |
| "mlp_ratio": 2.5, | |
| "mel_out_channels": 8, | |
| "max_position": 32768, | |
| "rope_theta": 1000000.0, | |
| "speaker_embedding_dim": 512, | |
| "text_embedding_dim": 768, | |
| "ssl_encoder_depths": [ | |
| 12, | |
| 12 | |
| ], | |
| "ssl_names": [ | |
| "mert", | |
| "m-hubert" | |
| ], | |
| "ssl_latent_dims": [ | |
| 1024, | |
| 768 | |
| ], | |
| "lyric_encoder_vocab_size": 6693, | |
| "lyric_hidden_size": 1024, | |
| "mel_patch_size": [ | |
| 16, | |
| 1 | |
| ], | |
| "max_height": 16, | |
| "max_width": 32768, | |
| "max_speech_token_num": 128, | |
| "_use_default_values": [ | |
| "mel_dim" | |
| ], | |
| "_class_name": "WanTransformer3DModel", | |
| "_diffusers_version": "0.33.0.dev0" | |
| } |