File size: 2,655 Bytes
eb06b1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
  "_class_name": "SanaTransformer2DModel",
  "_diffusers_version": "0.34.0.dev0",
  "_name_or_path": "Efficient-Large-Model/Sana_600M_1024px_diffusers",
  "attention_bias": false,
  "attention_head_dim": 32,
  "caption_channels": 2304,
  "cross_attention_dim": 1152,
  "cross_attention_head_dim": 72,
  "dropout": 0.0,
  "guidance_embeds": false,
  "guidance_embeds_scale": 0.1,
  "in_channels": 32,
  "interpolation_scale": null,
  "mlp_ratio": 2.5,
  "norm_elementwise_affine": false,
  "norm_eps": 1e-06,
  "num_attention_heads": 36,
  "num_cross_attention_heads": 16,
  "num_layers": 28,
  "out_channels": 32,
  "patch_size": 1,
  "qk_norm": null,
  "quantization_config": {
    "algorithm": "max",
    "block_quantize": null,
    "channel_quantize": null,
    "modelopt_config": {
      "algorithm": "max",
      "quant_cfg": {
        "*block_sparse_moe.gate*": {
          "enable": false
        },
        "*input_quantizer": {
          "enable": false
        },
        "*k_bmm_quantizer": {
          "enable": false
        },
        "*lm_head*": {
          "enable": false
        },
        "*mlp.gate.*": {
          "enable": false
        },
        "*mlp.shared_expert_gate.*": {
          "enable": false
        },
        "*output_layer*": {
          "enable": false
        },
        "*output_quantizer": {
          "enable": false
        },
        "*proj_out.*": {
          "enable": false
        },
        "*q_bmm_quantizer": {
          "enable": false
        },
        "*router*": {
          "enable": false
        },
        "*softmax_quantizer": {
          "enable": false
        },
        "*v_bmm_quantizer": {
          "enable": false
        },
        "*weight_quantizer": {
          "fake_quant": false,
          "num_bits": [
            4,
            3
          ]
        },
        "default": {
          "enable": false
        },
        "nn.BatchNorm1d": {
          "*": {
            "enable": false
          }
        },
        "nn.BatchNorm2d": {
          "*": {
            "enable": false
          }
        },
        "nn.BatchNorm3d": {
          "*": {
            "enable": false
          }
        },
        "nn.LeakyReLU": {
          "*": {
            "enable": false
          }
        },
        "output.*": {
          "enable": false
        }
      }
    },
    "modules_to_not_convert": null,
    "quant_method": "modelopt",
    "quant_type": "FP8",
    "type_bit_map": {
      "FP8": [
        4,
        3
      ],
      "INT4": 4,
      "NVFP4": [
        2,
        1
      ]
    },
    "weight_only": true
  },
  "sample_size": 32,
  "timestep_scale": 1.0
}