{ "conditioning_dim": 768, "max_condition_tokens": 256, "prefer_hidden_layer": -1, "use_native_embeddings": true, "norm_style": "rms_layer_adaptive", "enable_memory": true, "enable_steering": true, "memory_capacity": 128, "memory_top_k": 4, "memory_strength": 0.25, "use_high_fidelity_text_bridge": true, "bridge_dim": 768, "bridge_hidden_mult": 4, "bridge_gate_init": 0.0, "use_sdxl_conditioning_projector": true, "sdxl_token_dim": 2048, "sdxl_pooled_dim": 1280, "image_generator_class": "LightweightLatentImageGenerator", "image_generator_config": { "cond_dim": 768, "latent_channels": 4, "base_channels": 256, "diffusion_steps": 1000, "use_multiscale_refiner": true, "use_highfreq_head": true, "decoder_res_blocks": 0, "refiner_channels": 128, "use_attention_refiner": false, "generation_mode": "latent_diffusion", "vae_model_name_or_path": "models/Phillnet-2-SDXL-UNet-VAE", "vae_scale_factor": 0.13025, "decode_latents_on_generate": true, "latent_diffusion_channels": 256, "latent_diffusion_blocks": 2, "latent_diffusion_attention": false, "num_train_timesteps": 1000, "prediction_type": "epsilon", "default_inference_steps": 8, "denoiser_backbone": "multiscale_unet", "unet_base_channels": 192, "unet_res_blocks_per_stage": 2, "use_token_cross_attention": true, "cross_attention_heads": 8, "final_decode_mode": "unified", "final_rgb_blend": 0.35, "use_spatial_text_prior": true, "spatial_prior_hidden": 256, "spatial_prior_heads": 4, "spatial_prior_layers": 2, "spatial_prior_query_count": 256, "enable_quality_adapter": true, "quality_adapter_hidden": 64, "enable_visual_contract_adapter": true, "visual_contract_hidden": 64, "visual_contract_maps": 8, "enable_refiner_lora": true, "refiner_lora_rank": 16, "refiner_lora_hidden": 32, "enable_latent_refiner": true, "latent_refiner_hidden": 128, "enable_structure_prior": true, "structure_prior_hidden": 192, "structure_prior_seed_size": 16, "structure_prior_heads": 4, "use_pretrained_unet": true, "pretrained_unet_model_name_or_path": "models/Phillnet-2-SDXL-UNet-VAE" }, "aligner_input_dims": [ 768, 1024 ], "use_qwen_text_refiner": true, "qwen_refiner_hidden": 1024, "qwen_refiner_intermediate": 3584, "qwen_refiner_layers": 16, "qwen_refiner_attention_indices": [ 3, 7, 11, 15 ], "qwen_refiner_weights": "models/qwen_aligned_refiner/deep_16.pt", "text_tokenizer_dir": "tokenizer", "use_vision_encoder": false, "vision_hidden_size": 768, "vision_target_dim": 1024, "image_processor_dir": null }