| wandb_group: lvdm_cardiacnet |
| output_dir: /nfs/usrhome/khmuhammad/Echodream/experiments/cardiacnet_with_both_cfg |
| vae_path: /nfs/usrhome/khmuhammad/Echodream/models/vae |
| globals: |
| target_fps: 32 |
| target_nframes: 64 |
| outputs: |
| - video |
| - class_id |
| - image |
| datasets: |
| - name: CardiacNetLatent |
| active: true |
| params: |
| root: /nfs/usrhome/khmuhammad/Echodream/data/latents/cardiacnet |
| target_fps: ${globals.target_fps} |
| target_nframes: ${globals.target_nframes} |
| target_resolution: 14 |
| outputs: ${globals.outputs} |
| unet: |
| _class_name: UNetSpatioTemporalConditionModel |
| addition_time_embed_dim: 1 |
| block_out_channels: |
| - 128 |
| - 256 |
| - 256 |
| - 512 |
| cross_attention_dim: 1 |
| down_block_types: |
| - CrossAttnDownBlockSpatioTemporal |
| - CrossAttnDownBlockSpatioTemporal |
| - CrossAttnDownBlockSpatioTemporal |
| - DownBlockSpatioTemporal |
| in_channels: 8 |
| layers_per_block: 2 |
| num_attention_heads: |
| - 8 |
| - 16 |
| - 16 |
| - 32 |
| num_frames: ${globals.target_nframes} |
| out_channels: 4 |
| projection_class_embeddings_input_dim: 1 |
| sample_size: 14 |
| transformer_layers_per_block: 1 |
| up_block_types: |
| - UpBlockSpatioTemporal |
| - CrossAttnUpBlockSpatioTemporal |
| - CrossAttnUpBlockSpatioTemporal |
| - CrossAttnUpBlockSpatioTemporal |
| noise_scheduler: |
| _class_name: DDPMScheduler |
| num_train_timesteps: 1000 |
| beta_start: 0.0001 |
| beta_end: 0.02 |
| beta_schedule: linear |
| variance_type: fixed_small |
| clip_sample: true |
| clip_sample_range: 4.0 |
| prediction_type: v_prediction |
| thresholding: false |
| dynamic_thresholding_ratio: 0.995 |
| sample_max_value: 1.0 |
| timestep_spacing: leading |
| steps_offset: 0 |
| training_mode: diffusion |
| train_batch_size: 4 |
| dataloader_num_workers: 16 |
| max_train_steps: 100000 |
| training_conditioning_type: class_id |
| learning_rate: 1.0e-05 |
| lr_warmup_steps: 500 |
| scale_lr: false |
| lr_scheduler: constant |
| use_8bit_adam: false |
| gradient_accumulation_steps: 1 |
| noise_offset: 0.1 |
| drop_conditionning: 0.1 |
| gradient_checkpointing: false |
| use_ema: true |
| enable_xformers_memory_efficient_attention: false |
| allow_tf32: true |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_weight_decay: 0.01 |
| adam_epsilon: 1.0e-08 |
| max_grad_norm: 1.0 |
| logging_dir: logs |
| mixed_precision: fp16 |
| validation_timesteps: 128 |
| validation_fps: ${globals.target_fps} |
| validation_frames: ${globals.target_nframes} |
| validation_lvefs: |
| - 0.0 |
| - 0.4 |
| - 0.7 |
| - 1.0 |
| validation_class_ids: |
| - 0 |
| - 1 |
| - 2 |
| - 2 |
| validation_texts: |
| - '0.0' |
| - '0.4' |
| - '0.7' |
| - '1.0' |
| num_validation_samples: 4 |
| validation_guidance: 1.0 |
| validation_steps: 1000 |
| validation_conditioning_type: class_id |
| report_to: wandb |
| checkpointing_steps: 1000 |
| checkpoints_total_limit: 50 |
| resume_from_checkpoint: null |
| tracker_project_name: echo-dream |
| seed: 42 |
| text_encoder_path: openai/clip-vit-large-patch14 |
| pretrained_model_name_or_path: openai/clip-vit-large-patch14 |
| tokenizer_path: openai/clip-vit-large-patch14 |
| train_text_encoder: false |
| guidance_scale_class: 5.0 |
| guidance_scale_frame: 1.0 |
| use_separate_guidance: true |
| num_train_epochs: 2858 |
|
|