| { | |
| "_gradient_checkpointing": true, | |
| "architectures": [ | |
| "TBACUniImage" | |
| ], | |
| "attn_implementation": null, | |
| "diffusion_model_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers", | |
| "in_channels": 32, | |
| "input_size": 16, | |
| "loss_type": "flow", | |
| "max_input_text_tokens": 256, | |
| "mllm_id": "Qwen/Qwen2.5-VL-3B-Instruct", | |
| "model_type": "tbacuniimage", | |
| "modules_to_freeze": [ | |
| "vae", | |
| "model.mllm_backbone" | |
| ], | |
| "modules_to_unfreeze": [ | |
| "model.mllm_backbone.model.embed_tokens" | |
| ], | |
| "noise_scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers", | |
| "num_metaqueries": 64, | |
| "scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers", | |
| "system_prompt": "You will be given an image or its caption. Please describe the content of the image in detail in your own words.", | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "vae_downsample_f": 32, | |
| "vae_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers" | |
| } | |