{ "_gradient_checkpointing": true, "as_latents": false, "connector_num_hidden_layers": 6, "connector_qk_norm": false, "diffusion_model": "sana", "dim": 1536, "double_mllm": false, "ema_decay": 0.999, "encoder_id": "google/siglip2-so400m-patch16-512", "ffn_dim_multiplier": null, "from_scratch": false, "in_channels": 32, "input_size": 16, "latent_embedding_size": 1152, "learn_sigma": false, "load_lora": true, "lora_ckpt": "Hyper-SD15-1step-lora.safetensors", "lora_repo": "ByteDance/Hyper-SD", "loss_type": "flow", "max_str_length": 1024, "mllm_layers": null, "modules_to_freeze": [ "vae", "encoder" ], "modules_to_unfreeze": [], "multiple_of": 256, "n_heads": 32, "n_kv_heads": 8, "n_layers": 16, "noise_scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers", "norm_eps": 1e-05, "num_pooled_tokens": 64, "patch_size": 2, "pooler_output": false, "pre_proj": true, "qk_norm": true, "rope": true, "scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers", "system_prompt": "You will be given an image or its caption. Please describe the content of the image in detail in your own words.", "torch_dtype": "bfloat16", "transformers_version": "4.49.0.dev0", "unet_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers", "use_ema": false, "use_norm": false, "vae_downsample_f": 32, "vae_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers" }