| { |
| "unet_additional_kwargs": { |
| "use_inflated_groupnorm": true, |
| "unet_use_cross_frame_attention": false, |
| "unet_use_temporal_attention": false, |
| "use_motion_module": true, |
| "motion_module_resolutions": [1, 2, 4, 8], |
| "motion_module_mid_block": true, |
| "motion_module_decoder_only": false, |
| "motion_module_type": "Vanilla", |
| "motion_module_kwargs": { |
| "num_attention_heads": 8, |
| "num_transformer_block": 1, |
| "attention_block_types": ["Temporal_Self", "Temporal_Self"], |
| "temporal_position_encoding": true, |
| "temporal_position_encoding_max_len": 32, |
| "temporal_attention_dim_div": 1 |
| } |
| }, |
| "noise_scheduler_kwargs": { |
| "num_train_timesteps": 1000, |
| "beta_start": 0.00085, |
| "beta_end": 0.012, |
| "beta_schedule": "linear", |
| "steps_offset": 1, |
| "clip_sample": false |
| } |
| } |
|
|