Phillnet-2 / ImageGen /config.json
ayjays132's picture
Upload 478 files
101858b verified
{
"conditioning_dim": 768,
"max_condition_tokens": 256,
"prefer_hidden_layer": -1,
"use_native_embeddings": true,
"norm_style": "rms_layer_adaptive",
"enable_memory": true,
"enable_steering": true,
"memory_capacity": 128,
"memory_top_k": 4,
"memory_strength": 0.25,
"use_high_fidelity_text_bridge": true,
"bridge_dim": 768,
"bridge_hidden_mult": 4,
"bridge_gate_init": 0.0,
"use_sdxl_conditioning_projector": true,
"sdxl_token_dim": 2048,
"sdxl_pooled_dim": 1280,
"image_generator_class": "LightweightLatentImageGenerator",
"image_generator_config": {
"cond_dim": 768,
"latent_channels": 4,
"base_channels": 256,
"diffusion_steps": 1000,
"use_multiscale_refiner": true,
"use_highfreq_head": true,
"decoder_res_blocks": 0,
"refiner_channels": 128,
"use_attention_refiner": false,
"generation_mode": "latent_diffusion",
"vae_model_name_or_path": "models/Phillnet-2-SDXL-UNet-VAE",
"vae_scale_factor": 0.13025,
"decode_latents_on_generate": true,
"latent_diffusion_channels": 256,
"latent_diffusion_blocks": 2,
"latent_diffusion_attention": false,
"num_train_timesteps": 1000,
"prediction_type": "epsilon",
"default_inference_steps": 8,
"denoiser_backbone": "multiscale_unet",
"unet_base_channels": 192,
"unet_res_blocks_per_stage": 2,
"use_token_cross_attention": true,
"cross_attention_heads": 8,
"final_decode_mode": "unified",
"final_rgb_blend": 0.35,
"use_spatial_text_prior": true,
"spatial_prior_hidden": 256,
"spatial_prior_heads": 4,
"spatial_prior_layers": 2,
"spatial_prior_query_count": 256,
"enable_quality_adapter": true,
"quality_adapter_hidden": 64,
"enable_visual_contract_adapter": true,
"visual_contract_hidden": 64,
"visual_contract_maps": 8,
"enable_refiner_lora": true,
"refiner_lora_rank": 16,
"refiner_lora_hidden": 32,
"enable_latent_refiner": true,
"latent_refiner_hidden": 128,
"enable_structure_prior": true,
"structure_prior_hidden": 192,
"structure_prior_seed_size": 16,
"structure_prior_heads": 4,
"use_pretrained_unet": true,
"pretrained_unet_model_name_or_path": "models/Phillnet-2-SDXL-UNet-VAE"
},
"aligner_input_dims": [
768,
1024
],
"use_qwen_text_refiner": true,
"qwen_refiner_hidden": 1024,
"qwen_refiner_intermediate": 3584,
"qwen_refiner_layers": 16,
"qwen_refiner_attention_indices": [
3,
7,
11,
15
],
"qwen_refiner_weights": "models/qwen_aligned_refiner/deep_16.pt",
"text_tokenizer_dir": "tokenizer",
"use_vision_encoder": false,
"vision_hidden_size": 768,
"vision_target_dim": 1024,
"image_processor_dir": null
}