| { | |
| "_gradient_checkpointing": true, | |
| "architectures": [ | |
| "MetaQuery" | |
| ], | |
| "connector_num_hidden_layers": 24, | |
| "diffusion_model_id": "black-forest-labs/FLUX.1-Kontext-dev", | |
| "eval_image_size": 512, | |
| "guidance_scale": 1.0, | |
| "in_channels": 16, | |
| "input_size": 64, | |
| "loss_type": "flow", | |
| "max_images": 1, | |
| "max_input_text_tokens": 256, | |
| "mllm_id": "Qwen/Qwen2.5-VL-3B-Instruct", | |
| "model_type": "metaquery", | |
| "modules_to_freeze": [ | |
| "vae", | |
| "model.mllm_backbone", | |
| "model.transformer" | |
| ], | |
| "modules_to_unfreeze": [ | |
| "model.mllm_backbone.model.embed_tokens" | |
| ], | |
| "noise_scheduler_id": "black-forest-labs/FLUX.1-Kontext-dev", | |
| "num_metaqueries": 0, | |
| "scheduler_id": "black-forest-labs/FLUX.1-Kontext-dev", | |
| "system_prompt": "You will be given an image or its caption. Please describe the content of the image in detail in your own words.", | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "vae_downsample_f": 8, | |
| "vae_id": "black-forest-labs/FLUX.1-Kontext-dev" | |
| } | |