ehrxdiff / config.json
dek924's picture
Upload config.json with huggingface_hub
67ff59e verified
{
"linear_start": 0.0015,
"linear_end": 0.0295,
"num_timesteps_cond": 1,
"log_every_t": 200,
"timesteps": 1000,
"first_stage_key": "target_img",
"cond_stage_key": "table, prev_img",
"image_size": 64,
"channels": 3,
"cond_stage_trainable": true,
"conditioning_key": "crossattn",
"monitor": "val/loss_simple_ema",
"scale_factor": 0.18215,
"use_ema": true,
"load_ema": false,
"unet_config": {
"target": "cheff.ldm.modules.diffusionmodules.openaimodel.UNetModel",
"params": {
"image_size": 64,
"in_channels": 3,
"out_channels": 3,
"model_channels": 224,
"attention_resolutions": [
8,
4,
2
],
"num_res_blocks": 2,
"channel_mult": [
1,
2,
4,
4
],
"num_heads": 8,
"use_spatial_transformer": true,
"transformer_depth": 1,
"context_dim": 768,
"use_checkpoint": true,
"legacy": false
}
},
"first_stage_config": {
"target": "cheff.ldm.models.autoencoder.AutoencoderKL",
"params": {
"embed_dim": 3,
"ckpt_path": null,
"ddconfig": {
"double_z": true,
"z_channels": 3,
"resolution": 256,
"in_channels": 3,
"out_ch": 3,
"ch": 128,
"ch_mult": [
1,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [],
"dropout": 0.0
},
"lossconfig": {
"target": "torch.nn.Identity"
}
}
},
"cond_stage_config": {
"target": "cheff.ldm.modules.encoders.modules.MultiModalTransformerAdaptor",
"params": {
"autoencoder_config": {
"embed_dim": 3,
"ckpt_path": null,
"ddconfig": {
"double_z": true,
"z_channels": 3,
"resolution": 256,
"in_channels": 3,
"out_ch": 3,
"ch": 128,
"ch_mult": [
1,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [],
"dropout": 0.0
},
"lossconfig": {
"target": "torch.nn.Identity"
}
},
"clip_visual_enc_config": {
"input_resolution": 256,
"layers": 12,
"width": 768,
"patch_size": 32,
"heads": 12
},
"clip_enc_checkpoint": null,
"context_dim": 768,
"condition_feat_dim": 1024,
"clip_trainable": true
}
}
}