File size: 2,472 Bytes
86b2d29 67ff59e 86b2d29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
{
"linear_start": 0.0015,
"linear_end": 0.0295,
"num_timesteps_cond": 1,
"log_every_t": 200,
"timesteps": 1000,
"first_stage_key": "target_img",
"cond_stage_key": "table, prev_img",
"image_size": 64,
"channels": 3,
"cond_stage_trainable": true,
"conditioning_key": "crossattn",
"monitor": "val/loss_simple_ema",
"scale_factor": 0.18215,
"use_ema": true,
"load_ema": false,
"unet_config": {
"target": "cheff.ldm.modules.diffusionmodules.openaimodel.UNetModel",
"params": {
"image_size": 64,
"in_channels": 3,
"out_channels": 3,
"model_channels": 224,
"attention_resolutions": [
8,
4,
2
],
"num_res_blocks": 2,
"channel_mult": [
1,
2,
4,
4
],
"num_heads": 8,
"use_spatial_transformer": true,
"transformer_depth": 1,
"context_dim": 768,
"use_checkpoint": true,
"legacy": false
}
},
"first_stage_config": {
"target": "cheff.ldm.models.autoencoder.AutoencoderKL",
"params": {
"embed_dim": 3,
"ckpt_path": null,
"ddconfig": {
"double_z": true,
"z_channels": 3,
"resolution": 256,
"in_channels": 3,
"out_ch": 3,
"ch": 128,
"ch_mult": [
1,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [],
"dropout": 0.0
},
"lossconfig": {
"target": "torch.nn.Identity"
}
}
},
"cond_stage_config": {
"target": "cheff.ldm.modules.encoders.modules.MultiModalTransformerAdaptor",
"params": {
"autoencoder_config": {
"embed_dim": 3,
"ckpt_path": null,
"ddconfig": {
"double_z": true,
"z_channels": 3,
"resolution": 256,
"in_channels": 3,
"out_ch": 3,
"ch": 128,
"ch_mult": [
1,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [],
"dropout": 0.0
},
"lossconfig": {
"target": "torch.nn.Identity"
}
},
"clip_visual_enc_config": {
"input_resolution": 256,
"layers": 12,
"width": 768,
"patch_size": 32,
"heads": 12
},
"clip_enc_checkpoint": null,
"context_dim": 768,
"condition_feat_dim": 1024,
"clip_trainable": true
}
}
} |