File size: 1,630 Bytes
e1e2753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
DiT(
  (x_embedder): PatchEmbed(
    (proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
    (norm): Identity()
  )
  (t_embedder): TimestepEmbedder(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=1152, bias=True)
      (1): SiLU()
      (2): Linear(in_features=1152, out_features=1152, bias=True)
    )
  )
  (y_embedder): LabelEmbedder(
    (embedding_table): Embedding(1001, 1152)
  )
  (blocks): ModuleList(
    (0-27): 28 x DiTBlock(
      (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
      (attn): Attention(
        (qkv): Linear(in_features=1152, out_features=3456, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1152, out_features=1152, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
      (mlp): Mlp(
        (fc1): Linear(in_features=1152, out_features=4608, bias=True)
        (act): GELU(approximate='tanh')
        (drop1): Dropout(p=0, inplace=False)
        (fc2): Linear(in_features=4608, out_features=1152, bias=True)
        (drop2): Dropout(p=0, inplace=False)
      )
      (adaLN_modulation): Sequential(
        (0): SiLU()
        (1): Linear(in_features=1152, out_features=6912, bias=True)
      )
    )
  )
  (final_layer): FinalLayer(
    (norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
    (linear): Linear(in_features=1152, out_features=64, bias=True)
    (adaLN_modulation): Sequential(
      (0): SiLU()
      (1): Linear(in_features=1152, out_features=2304, bias=True)
    )
  )
)