{
  "_gradient_checkpointing": true,
  "as_latents": false,
  "connector_num_hidden_layers": 6,
  "connector_qk_norm": false,
  "diffusion_model": "sana",
  "dim": 1536,
  "double_mllm": false,
  "ema_decay": 0.999,
  "encoder_id": "google/siglip2-so400m-patch16-512",
  "ffn_dim_multiplier": null,
  "from_scratch": false,
  "in_channels": 32,
  "input_size": 16,
  "latent_embedding_size": 1152,
  "learn_sigma": false,
  "load_lora": true,
  "lora_ckpt": "Hyper-SD15-1step-lora.safetensors",
  "lora_repo": "ByteDance/Hyper-SD",
  "loss_type": "flow",
  "max_str_length": 1024,
  "mllm_layers": null,
  "modules_to_freeze": [
    "vae",
    "encoder"
  ],
  "modules_to_unfreeze": [],
  "multiple_of": 256,
  "n_heads": 32,
  "n_kv_heads": 8,
  "n_layers": 16,
  "noise_scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers",
  "norm_eps": 1e-05,
  "num_pooled_tokens": 64,
  "patch_size": 2,
  "pooler_output": false,
  "pre_proj": true,
  "qk_norm": true,
  "rope": true,
  "scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers",
  "system_prompt": "You will be given an image or its caption. Please describe the content of the image in detail in your own words.",
  "torch_dtype": "bfloat16",
  "transformers_version": "4.49.0.dev0",
  "unet_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers",
  "use_ema": false,
  "use_norm": false,
  "vae_downsample_f": 32,
  "vae_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers"
}