| { | |
| "_name_or_path": "morphablediffusion/config.json", | |
| "activation_dropout": 0.0, | |
| "activation_function": "gelu", | |
| "attention_dropout": 0.0, | |
| "attention_window": 512, | |
| "bos_token_id": 0, | |
| "classifier_dropout": 0.0, | |
| "d_model": 1024, | |
| "data": { | |
| "params": { | |
| "batch_size": 70, | |
| "data_dir": "/cluster/scratch/xiychen/data/facescape_color_calibrated", | |
| "mesh_topology": "flame", | |
| "num_workers": 1, | |
| "shuffled_expression": true | |
| }, | |
| "target": "ldm.data.facescape.FaceScapeDataset" | |
| }, | |
| "decoder_attention_heads": 16, | |
| "decoder_ffn_dim": 4096, | |
| "decoder_layerdrop": 0.0, | |
| "decoder_layers": 12, | |
| "decoder_start_token_id": 2, | |
| "dropout": 0.1, | |
| "encoder_attention_heads": 16, | |
| "encoder_ffn_dim": 4096, | |
| "encoder_layerdrop": 0.0, | |
| "encoder_layers": 12, | |
| "eos_token_id": 2, | |
| "init_std": 0.02, | |
| "is_encoder_decoder": true, | |
| "lightning": { | |
| "callbacks": {}, | |
| "modelcheckpoint": { | |
| "params": { | |
| "every_n_train_steps": 2000 | |
| } | |
| }, | |
| "trainer": { | |
| "accumulate_grad_batches": 1, | |
| "benchmark": true, | |
| "check_val_every_n_epoch": null, | |
| "max_steps": 6000, | |
| "num_sanity_val_steps": 0, | |
| "precision": 32, | |
| "val_check_interval": 250 | |
| } | |
| }, | |
| "max_decoder_position_embeddings": 1024, | |
| "max_encoder_position_embeddings": 16384, | |
| "model": { | |
| "base_learning_rate": "5e-5", | |
| "params": { | |
| "batch_view_num": 4, | |
| "cfg_scale": 2.0, | |
| "clip_image_encoder_path": "./ckpt/ViT-L-14.pt", | |
| "drop_conditions": false, | |
| "finetune_unet": true, | |
| "image_size": 256, | |
| "output_num": 8, | |
| "projection": "perspective", | |
| "scheduler_config": { | |
| "params": { | |
| "cycle_lengths": [ | |
| 100000 | |
| ], | |
| "f_max": [ | |
| 1.0 | |
| ], | |
| "f_min": [ | |
| 1.0 | |
| ], | |
| "f_start": [ | |
| 0.02 | |
| ], | |
| "warm_up_steps": [ | |
| 100 | |
| ] | |
| }, | |
| "target": "ldm.lr_scheduler.LambdaLinearScheduler" | |
| }, | |
| "target_elevation": 0, | |
| "unet_config": { | |
| "params": { | |
| "attention_resolutions": [ | |
| 4, | |
| 2, | |
| 1 | |
| ], | |
| "channel_mult": [ | |
| 1, | |
| 2, | |
| 4, | |
| 4 | |
| ], | |
| "context_dim": 768, | |
| "image_size": 32, | |
| "in_channels": 8, | |
| "legacy": false, | |
| "model_channels": 320, | |
| "num_heads": 8, | |
| "num_res_blocks": 2, | |
| "out_channels": 4, | |
| "transformer_depth": 1, | |
| "use_checkpoint": true, | |
| "use_spatial_transformer": true, | |
| "volume_dims": [ | |
| 64, | |
| 128, | |
| 256, | |
| 512 | |
| ] | |
| }, | |
| "target": "ldm.models.diffusion.attention.DepthWiseAttention" | |
| }, | |
| "use_spatial_volume": false, | |
| "view_num": 16 | |
| }, | |
| "target": "ldm.models.diffusion.morphable_diffusion.SyncMultiviewDiffusion" | |
| }, | |
| "model_type": "led", | |
| "num_hidden_layers": 12, | |
| "pad_token_id": 1, | |
| "transformers_version": "4.42.4", | |
| "use_cache": true, | |
| "vocab_size": 50265 | |
| } | |