{ "metadata": { "total_size": 37752349184 }, "weight_map": { "audio_caption_projection.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_caption_projection.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_caption_projection.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_caption_projection.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_proj_in.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_proj_in.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_proj_out.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "audio_proj_out.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_time_embed.emb.timestep_embedder.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_time_embed.emb.timestep_embedder.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_time_embed.emb.timestep_embedder.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_time_embed.emb.timestep_embedder.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_time_embed.linear.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "audio_time_embed.linear.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_scale_shift.emb.timestep_embedder.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_scale_shift.emb.timestep_embedder.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_scale_shift.emb.timestep_embedder.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_scale_shift.emb.timestep_embedder.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_scale_shift.linear.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_scale_shift.linear.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_v2a_gate.emb.timestep_embedder.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_v2a_gate.emb.timestep_embedder.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_v2a_gate.emb.timestep_embedder.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_v2a_gate.emb.timestep_embedder.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_v2a_gate.linear.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_audio_v2a_gate.linear.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_a2v_gate.emb.timestep_embedder.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_a2v_gate.emb.timestep_embedder.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_a2v_gate.emb.timestep_embedder.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_a2v_gate.emb.timestep_embedder.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_a2v_gate.linear.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_a2v_gate.linear.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_scale_shift.emb.timestep_embedder.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_scale_shift.emb.timestep_embedder.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_scale_shift.emb.timestep_embedder.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_scale_shift.emb.timestep_embedder.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_scale_shift.linear.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "av_cross_attn_video_scale_shift.linear.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "caption_projection.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "caption_projection.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "caption_projection.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "caption_projection.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "proj_in.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "proj_in.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "proj_out.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "proj_out.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "time_embed.emb.timestep_embedder.linear_1.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "time_embed.emb.timestep_embedder.linear_1.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "time_embed.emb.timestep_embedder.linear_2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "time_embed.emb.timestep_embedder.linear_2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "time_embed.linear.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "time_embed.linear.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.0.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.1.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.10.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.11.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.12.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.13.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.14.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.15.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.16.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.17.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.18.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.19.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.2.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.2.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.20.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.20.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.21.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.22.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.23.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn1.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_attn2.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.24.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.24.audio_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.ff.net.0.proj.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.ff.net.0.proj.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.ff.net.2.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.ff.net.2.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.24.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00002-of-00004.safetensors", "transformer_blocks.25.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.25.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.26.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.27.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.28.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.29.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.3.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.3.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.30.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.30.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.31.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.32.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.33.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.34.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.35.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.ff.net.2.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.ff.net.2.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.36.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn1.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_attn2.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.37.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.37.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.37.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.37.audio_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.ff.net.0.proj.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.ff.net.0.proj.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.37.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.37.scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.37.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00003-of-00004.safetensors", "transformer_blocks.38.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.38.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.39.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.4.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.4.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.40.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.40.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.41.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.42.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.43.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.44.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.45.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.46.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn1.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_attn2.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.ff.net.0.proj.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.ff.net.0.proj.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.ff.net.2.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.ff.net.2.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.47.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00004-of-00004.safetensors", "transformer_blocks.5.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.5.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.6.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.7.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.8.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn1.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_attn2.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.audio_to_video_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.ff.net.0.proj.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.ff.net.0.proj.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.ff.net.2.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.ff.net.2.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_a2v_cross_attn_scale_shift_table": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.norm_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.norm_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_k.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_k.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_out.0.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_out.0.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_q.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_q.weight": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_v.bias": "diffusion_pytorch_model-00001-of-00004.safetensors", "transformer_blocks.9.video_to_audio_attn.to_v.weight": "diffusion_pytorch_model-00001-of-00004.safetensors" } }