| unets: |
| unet1: |
| dim: 64 |
| num_resnet_blocks: 2 |
| dim_mults: |
| - 1 |
| - 2 |
| - 4 |
| max_text_len: 1 |
| layer_attns: false |
| layer_cross_attns: |
| - false |
| - false |
| - true |
| cond_images_channels: 3 |
| unet2: |
| dim: 64 |
| num_resnet_blocks: 2 |
| dim_mults: |
| - 1 |
| - 2 |
| - 4 |
| max_text_len: 1 |
| layer_attns: false |
| layer_cross_attns: |
| - false |
| - false |
| - true |
| memory_efficient: true |
| cond_images_channels: 3 |
| imagen: |
| elucidated: true |
| condition_on_text: true |
| image_sizes: |
| - 56 |
| - 112 |
| text_embed_dim: 1 |
| num_sample_steps: |
| - 32 |
| - 64 |
| random_crop_sizes: |
| - null |
| - 56 |
| temporal_downsample_factor: |
| - 4 |
| - 1 |
| sigma_min: 0.002 |
| sigma_max: 80 |
| sigma_data: 0.25 |
| rho: 7 |
| P_mean: -1.2 |
| P_std: 1.2 |
| S_churn: |
| - 80 |
| - 160 |
| S_tmin: 0.05 |
| S_tmax: 50 |
| S_noise: 1.003 |
| resize_mode: trilinear |
| trainer: |
| split_batches: false |
| lr: 0.0005 |
| dl_tuple_output_keywords_names: |
| - images |
| - text_embeds |
| - cond_images |
| dataset: |
| data_path: /path/to/EchoNet-Dynamic |
| deactivate_cache: false |
| fps: 32 |
| duration: 2.0 |
| grayscale: false |
| dataloader: |
| batch_size: 8 |
| num_workers: 8 |
| wandb: |
| project: EchoDiffusion |
| |
| checkpoint: |
| path: /path/to/checkpoints |
| batch_size: 4 |
| cond_scale: 5.0 |
| save_every_x_it: 5000 |
|
|