{ "_gradient_checkpointing": true, "architectures": [ "MetaQuery" ], "connector_num_hidden_layers": 24, "diffusion_model_id": "black-forest-labs/FLUX.1-Kontext-dev", "eval_image_size": 512, "guidance_scale": 1.0, "in_channels": 16, "input_size": 64, "loss_type": "flow", "max_images": 1, "max_input_text_tokens": 256, "mllm_id": "Qwen/Qwen2.5-VL-3B-Instruct", "model_type": "metaquery", "modules_to_freeze": [ "vae", "model.mllm_backbone", "model.transformer" ], "modules_to_unfreeze": [ "model.mllm_backbone.model.embed_tokens" ], "noise_scheduler_id": "black-forest-labs/FLUX.1-Kontext-dev", "num_metaqueries": 0, "scheduler_id": "black-forest-labs/FLUX.1-Kontext-dev", "system_prompt": "You will be given an image or its caption. Please describe the content of the image in detail in your own words.", "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "vae_downsample_f": 8, "vae_id": "black-forest-labs/FLUX.1-Kontext-dev" }