File size: 1,505 Bytes
bc0b7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{
  "adain": true,
  "bottleneck_type": "rq",
  "checkpointing": false,
  "code_shape": [
    32,
    32,
    1
  ],
  "connect_list": [
    "32",
    "64",
    "128",
    "256"
  ],
  "ddconfig": {
    "attn_resolutions": [
      32,
      64,
      128
    ],
    "ch": 64,
    "ch_mult": [
      1,
      2,
      4,
      4,
      8
    ],
    "depths": [
      2,
      2,
      2,
      2,
      2
    ],
    "double_z": false,
    "dropout": 0.0,
    "in_channels": 3,
    "num_frames": 3,
    "num_head": 8,
    "num_heads": [
      8,
      8,
      8,
      8,
      8
    ],
    "num_res_blocks": 1,
    "out_ch": 3,
    "resolution": 512,
    "stages_atten": 4,
    "window_size": [
      5,
      5,
      5
    ],
    "window_sizes": [
      [
        4,
        4
      ],
      [
        4,
        4
      ],
      [
        4,
        4
      ],
      [
        4,
        4
      ],
      [
        4,
        4
      ]
    ],
    "z_channels": 256
  },
  "decay": 0.99,
  "detach_16": true,
  "dim_embd": 512,
  "droprate": 0.0,
  "embed_dim": 512,
  "fix_modules": [
    "quantizer",
    "decoder",
    "conditionnet"
  ],
  "latent_loss_weight": 0.25,
  "latent_shape": [
    32,
    32,
    512
  ],
  "loss_type": "mse",
  "n_embed": 1024,
  "n_head": 8,
  "n_layers": 9,
  "restart_unused_codes": true,
  "shared_codebook": true,
  "tf": 3,
  "type": "PGTFormer",
  "w": 1
}