{ "network": { "data_size": 32, "data_dim": 512, "num_input_channels": 4, "num_latents": 256, "latents_dim": 1024, "label_dim": 2048, "num_cond_tokens": 77, "num_processing_layers": 4, "num_blocks": 4, "patch_size": 2, "read_write_heads": 16, "compute_heads": 32, "latent_mlp_multiplier": 4, "data_mlp_multiplier": 4, "compute_dropout": 0, "rw_stochastic_depth": 0, "compute_stochastic_depth": 0, "concat_cond_token_to_latents": false, "use_cond_rin_block": true, "num_text_registers": 16, "coherence_keys": [ "clip_score", "aesthetic_score", "image_reward_score", "pick_a_score_score", "hpsv2_score", "vqa_score", "sciscore_score" ], "coherence_dropout": 0.0, "dropout_strategy": "binomial", "use_self_conditioning": true }, "preconditioning": { "num_latents": 256, "latents_dim": 1024, "do_normalization": true, "sigma_data": 0.5, "do_gradnorm_reweighting": true, "logvar_channels": 128, "logvar_mlp_layers": 0 }, "data_preprocessing": { "input_key_mean": "vae_embeddings_mean_256", "input_key_std": "vae_embeddings_std_256", "output_key_root": "x_0", "vae_sample": true, "channel_wise_normalisation": true, "model_type": "sdxl" }, "postprocessing": { "channel_wise_normalisation": true, "model_type": "sdxl" }, "scheduler": { "start": 1, "end": 0, "clip_min": 1e-09 }, "coherence_keys": [ "clip_score", "aesthetic_score", "image_reward_score", "pick_a_score_score", "hpsv2_score", "vqa_score", "sciscore_score" ], "sampler_defaults": { "num_steps": 50, "guidance_scale": 7.0, "sigma_data": 0.5 }, "data_resolution": 32, "img_resolution": 256, "max_text_len": 77, "model_type": "sdxl", "vae_repo": "stabilityai/sdxl-vae", "text_encoder_repo": "google/flan-t5-xl" }