{
    "model": {
        "base_learning_rate": 5e-07,
        "target": "ldm.models.diffusion.ddpm.LatentDiffusion",
        "params": {
            "ckpt_path": null,
            "linear_start": 0.0015,
            "linear_end": 0.0195,
            "num_timesteps_cond": 1,
            "log_every_t": 200,
            "timesteps": 1000,
            "first_stage_key": "image",
            "image_size": 64,
            "channels": 3,
            "cond_stage_trainable": true,
            "cond_stage_key": "faceattr",
            "conditioning_key": "crossattn",
            "monitor": "val/loss_simple_ema",
            "unet_config": {
                "target": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
                "params": {
                    "image_size": 64,
                    "in_channels": 3,
                    "out_channels": 3,
                    "model_channels": 224,
                    "attention_resolutions": [
                        8,
                        4,
                        2
                    ],
                    "num_res_blocks": 2,
                    "channel_mult": [
                        1,
                        2,
                        3,
                        4
                    ],
                    "num_head_channels": 32,
                    "use_spatial_transformer": true,
                    "transformer_depth": 1,
                    "context_dim": 256
                }
            },
            "first_stage_config": {
                "target": "ldm.models.autoencoder.VQModelInterface",
                "params": {
                    "embed_dim": 3,
                    "n_embed": 8192,
                    "ddconfig": {
                        "double_z": false,
                        "z_channels": 3,
                        "resolution": 256,
                        "in_channels": 3,
                        "out_ch": 3,
                        "ch": 128,
                        "ch_mult": [
                            1,
                            2,
                            4
                        ],
                        "num_res_blocks": 2,
                        "attn_resolutions": [],
                        "dropout": 0.0
                    },
                    "lossconfig": {
                        "target": "torch.nn.Identity"
                    }
                }
            },
            "cond_stage_config": {
                "target": "ldm.modules.encoders.modules.FaceEmbedder",
                "params": {
                    "lmk_dim": 256,
                    "comb_mode": "stack",
                    "keys": [
                        "image",
                        "landmark"
                    ],
                    "attention": true,
                    "merge_eyes": true,
                    "face_model": "r100",
                    "affine_crop": true
                }
            }
        }
    }
}