{ "model": { "base_learning_rate": 5e-07, "target": "ldm.models.diffusion.ddpm.LatentDiffusion", "params": { "ckpt_path": null, "linear_start": 0.0015, "linear_end": 0.0195, "num_timesteps_cond": 1, "log_every_t": 200, "timesteps": 1000, "first_stage_key": "image", "image_size": 64, "channels": 3, "cond_stage_trainable": true, "cond_stage_key": "faceattr", "conditioning_key": "crossattn", "monitor": "val/loss_simple_ema", "unet_config": { "target": "ldm.modules.diffusionmodules.openaimodel.UNetModel", "params": { "image_size": 64, "in_channels": 3, "out_channels": 3, "model_channels": 224, "attention_resolutions": [ 8, 4, 2 ], "num_res_blocks": 2, "channel_mult": [ 1, 2, 3, 4 ], "num_head_channels": 32, "use_spatial_transformer": true, "transformer_depth": 1, "context_dim": 256 } }, "first_stage_config": { "target": "ldm.models.autoencoder.VQModelInterface", "params": { "embed_dim": 3, "n_embed": 8192, "ddconfig": { "double_z": false, "z_channels": 3, "resolution": 256, "in_channels": 3, "out_ch": 3, "ch": 128, "ch_mult": [ 1, 2, 4 ], "num_res_blocks": 2, "attn_resolutions": [], "dropout": 0.0 }, "lossconfig": { "target": "torch.nn.Identity" } } }, "cond_stage_config": { "target": "ldm.modules.encoders.modules.FaceEmbedder", "params": { "lmk_dim": 256, "comb_mode": "stack", "keys": [ "image", "landmark" ], "attention": true, "merge_eyes": true, "face_model": "r100", "affine_crop": true } } } } }