| n_tokens: 3 |
| pose_cond_dim: 5 |
| use_plucker: true |
| focal_length: 0.35 |
| customized_validation: true |
| memory_condition_length: 8 |
| log_video: true |
| relative_embedding: true |
| state_embed_only_on_qk: true |
| use_domain_adapter: false |
| use_memory_attention: true |
| add_timestamp_embedding: true |
| use_pose_prediction: true |
| require_pose_prediction: true |
| is_interactive: true |
| diffusion: |
| sampling_timesteps: 20 |
| beta_schedule: sigmoid |
| objective: pred_v |
| use_fused_snr: True |
| cum_snr_decay: 0.96 |
| clip_noise: 20. |
| ddim_sampling_eta: 0.0 |
| stabilization_level: 15 |
| schedule_fn_kwargs: {} |
| use_snr: False |
| use_cum_snr: False |
| snr_clip: 5.0 |
| timesteps: 1000 |
| |
| architecture: |
| network_size: 64 |
| attn_heads: 4 |
| attn_dim_head: 64 |
| dim_mults: [1, 2, 4, 8] |
| resolution: ${dataset.resolution} |
| attn_resolutions: [16, 32, 64, 128] |
| use_init_temporal_attn: True |
| use_linear_attn: True |
| time_emb_type: rotary |
|
|
| weight_decay: 2e-3 |
| warmup_steps: 10000 |
| optimizer_beta: [0.9, 0.99] |
| action_cond_dim: 25 |
| n_frames: 8 |
| frame_skip: 1 |
| frame_stack: 1 |
| uncertainty_scale: 1 |
| guidance_scale: 0.0 |
| chunk_size: 1 |
| scheduling_matrix: full_sequence |
| noise_level: random_all |
| causal: True |
| x_shape: [3, 360, 640] |
| context_frames: 1 |
| diffusion_path: zeqixiao/worldmem_checkpoints/diffusion_only.ckpt |
| vae_path: zeqixiao/worldmem_checkpoints/vae_only.ckpt |
| pose_predictor_path: zeqixiao/worldmem_checkpoints/pose_prediction_model_only.ckpt |
| next_frame_length: 1 |
|
|