tryon_checkpoints / tryon_lora_1_23 /low /transformer3d_structure.txt
Jayce-Ping's picture
Upload folder using huggingface_hub
fdaa368 verified
Wan2_2Transformer3DModel(
(patch_embedding): Conv3d(36, 5120, kernel_size=(1, 2, 2), stride=(1, 2, 2))
(text_embedding): Sequential(
(0): Linear(in_features=4096, out_features=5120, bias=True)
(1): GELU(approximate='tanh')
(2): Linear(in_features=5120, out_features=5120, bias=True)
)
(time_embedding): Sequential(
(0): Linear(in_features=256, out_features=5120, bias=True)
(1): SiLU()
(2): Linear(in_features=5120, out_features=5120, bias=True)
)
(time_projection): Sequential(
(0): SiLU()
(1): Linear(in_features=5120, out_features=30720, bias=True)
)
(blocks): ModuleList(
(0-39): 40 x WanAttentionBlock(
(norm1): WanLayerNorm((5120,), eps=1e-06, elementwise_affine=False)
(self_attn): WanSelfAttention(
(q): Linear(in_features=5120, out_features=5120, bias=True)
(k): Linear(in_features=5120, out_features=5120, bias=True)
(v): Linear(in_features=5120, out_features=5120, bias=True)
(o): Linear(in_features=5120, out_features=5120, bias=True)
(norm_q): WanRMSNorm()
(norm_k): WanRMSNorm()
)
(norm3): WanLayerNorm((5120,), eps=1e-06, elementwise_affine=True)
(cross_attn): WanCrossAttention(
(q): Linear(in_features=5120, out_features=5120, bias=True)
(k): Linear(in_features=5120, out_features=5120, bias=True)
(v): Linear(in_features=5120, out_features=5120, bias=True)
(o): Linear(in_features=5120, out_features=5120, bias=True)
(norm_q): WanRMSNorm()
(norm_k): WanRMSNorm()
)
(norm2): WanLayerNorm((5120,), eps=1e-06, elementwise_affine=False)
(ffn): Sequential(
(0): Linear(in_features=5120, out_features=13824, bias=True)
(1): GELU(approximate='tanh')
(2): Linear(in_features=13824, out_features=5120, bias=True)
)
)
)
(head): Head(
(norm): WanLayerNorm((5120,), eps=1e-06, elementwise_affine=False)
(head): Linear(in_features=5120, out_features=64, bias=True)
)
)