{ "model_version": "2.3.0", "is_v2": true, "model_type": "AudioVideo", "num_attention_heads": 32, "attention_head_dim": 128, "in_channels": 128, "out_channels": 128, "num_layers": 48, "cross_attention_dim": 4096, "caption_channels": null, "apply_gated_attention": true, "audio_num_attention_heads": 32, "audio_attention_head_dim": 64, "audio_in_channels": 128, "audio_out_channels": 128, "audio_cross_attention_dim": 2048, "positional_embedding_theta": 10000.0, "positional_embedding_max_pos": [ 20, 2048, 2048 ], "audio_positional_embedding_max_pos": [ 20 ], "timestep_scale_multiplier": 1000, "av_ca_timestep_scale_multiplier": 1000, "norm_eps": 1e-06, "connector_positional_embedding_max_pos": [ 4096 ], "connector_rope_type": "SPLIT", "variants": { "distilled": { "cross_attention_adaln": true }, "dev": { "cross_attention_adaln": true } } }