defaults: - base_video_dit - _self_ _name: dememwm_memory_dit # Standalone Memory-DiT path. Do not route through old SSM-memory config. memory_token_cross_attention: true memory_cross_attn_layers: null memory_condition_length: 0 pose_cond_dim: 5 log_video: false dememwm: enabled: true training_stage: stage_1 # fallback only when curriculum.enabled=false debug_force_all_streams: false curriculum: enabled: true full_stage_start_step: 60000 freeze_vae: true dit_freeze: enabled: true lr: dememwm_modules: 1.0e-4 memory_adapters: 1.0e-4 full_dit: 1.0e-5 # Current Conv2D memory projectors preserve latent H,W=(18,32). # Pool sizes are resolved from projected spatial grid size and downsample ratios. token_patch_size: 2 anchor: enabled: true anchor_indices: [0, 1, 2, 3] allow_generated_as_anchor: false diverse_selection: true compress: downsample_ratio: 4 dynamic: enabled: true exclude_latest_local_frames: 4 recent_frames: 8 conv_kernel_t: 3 conv_stride_t: 2 revisit: enabled: true deterministic_pose_retrieval: true fov_overlap_threshold: 0.30 high_quality_fov_threshold: 0.70 plucker_weight: 0.10 max_frames: 2 # FoV geometry for coverage-based retrieval scoring. # fov_half_h/v: half-angles (degrees) of the horizontal/vertical field of view. # fov_radius: world-space radius of the sample sphere. # fov_{yaw,pitch,depth}_samples: grid resolution for FoV point sampling. fov_half_h: 52.5 # 105 deg total horizontal FoV fov_half_v: 37.5 # 75 deg total vertical FoV fov_radius: 30.0 fov_yaw_samples: 25 fov_pitch_samples: 20 fov_depth_samples: 20 pose_preselect_topk: 64 # Plucker descriptor grid for secondary pose-similarity scoring. plucker_grid_h: 4 plucker_grid_w: 4 plucker_focal_length: 0.35 compress: downsample_ratio: 4 stage_policy: noise_bucket_logging: true eval_ablation: enabled: false branch: A_plus_D_plus_R_normal generated_history_proxy: enabled: false start_step: 0 ramp_steps: 1 max_prob: 0.0 noise_std: 0.25 dropout_prob: 0.0 injection: dit_hidden_size: 1024 anchor_gate: 1.0 dynamic_gate: 1.0 revisit_gate: 1.0 cache: enabled: true device: cpu keep_raw_latents: all keep_compressed_records: true keep_prefix_anchors: true eviction_policy: none no_evict: true clear_between_videos: true max_records: null on_capacity_exceeded: warn checkpoint: strict_dememwm_eval_load: true diffusion: architecture: network_size: 64