| { |
| "network": { |
| "data_size": 32, |
| "data_dim": 512, |
| "num_input_channels": 4, |
| "num_latents": 256, |
| "latents_dim": 1024, |
| "label_dim": 2048, |
| "num_cond_tokens": 77, |
| "num_processing_layers": 4, |
| "num_blocks": 4, |
| "patch_size": 2, |
| "read_write_heads": 16, |
| "compute_heads": 32, |
| "latent_mlp_multiplier": 4, |
| "data_mlp_multiplier": 4, |
| "compute_dropout": 0, |
| "rw_stochastic_depth": 0, |
| "compute_stochastic_depth": 0, |
| "concat_cond_token_to_latents": false, |
| "use_cond_rin_block": true, |
| "num_text_registers": 16, |
| "coherence_keys": [ |
| "clip_score", |
| "aesthetic_score", |
| "image_reward_score", |
| "pick_a_score_score", |
| "hpsv2_score", |
| "vqa_score", |
| "sciscore_score" |
| ], |
| "coherence_dropout": 0.0, |
| "dropout_strategy": "binomial", |
| "use_self_conditioning": true |
| }, |
| "preconditioning": { |
| "num_latents": 256, |
| "latents_dim": 1024, |
| "do_normalization": true, |
| "sigma_data": 0.5, |
| "do_gradnorm_reweighting": true, |
| "logvar_channels": 128, |
| "logvar_mlp_layers": 0 |
| }, |
| "data_preprocessing": { |
| "input_key_mean": "vae_embeddings_mean_256", |
| "input_key_std": "vae_embeddings_std_256", |
| "output_key_root": "x_0", |
| "vae_sample": true, |
| "channel_wise_normalisation": true, |
| "model_type": "sdxl" |
| }, |
| "postprocessing": { |
| "channel_wise_normalisation": true, |
| "model_type": "sdxl" |
| }, |
| "scheduler": { |
| "start": 1, |
| "end": 0, |
| "clip_min": 1e-09 |
| }, |
| "coherence_keys": [ |
| "clip_score", |
| "aesthetic_score", |
| "image_reward_score", |
| "pick_a_score_score", |
| "hpsv2_score", |
| "vqa_score", |
| "sciscore_score" |
| ], |
| "sampler_defaults": { |
| "num_steps": 50, |
| "guidance_scale": 7.0, |
| "sigma_data": 0.5 |
| }, |
| "data_resolution": 32, |
| "img_resolution": 256, |
| "max_text_len": 77, |
| "model_type": "sdxl", |
| "vae_repo": "stabilityai/sdxl-vae", |
| "text_encoder_repo": "google/flan-t5-xl" |
| } |