DeMemWM / configurations /algorithm /dememwm_memory_dit.yaml

Clean DeMemWM deterministic memory slot handling

93d7b0a 3 days ago

2.68 kB


	defaults:
	- base_video_dit
	- _self_

	_name: dememwm_memory_dit

	# Standalone Memory-DiT path. Do not route through old SSM-memory config.
	memory_token_cross_attention: true
	memory_cross_attn_layers: null
	memory_condition_length: 0
	pose_cond_dim: 5
	log_video: false

	dememwm:
	enabled: true
	training_stage: stage_1 # fallback only when curriculum.enabled=false
	debug_force_all_streams: false
	curriculum:
	enabled: true
	full_stage_start_step: 60000
	freeze_vae: true
	dit_freeze:
	enabled: true
	lr:
	dememwm_modules: 1.0e-4
	memory_adapters: 1.0e-4
	full_dit: 1.0e-5
	# Current Conv2D memory projectors preserve latent H,W=(18,32).
	# Pool sizes are resolved from projected spatial grid size and downsample ratios.
	token_patch_size: 2
	anchor:
	enabled: true
	anchor_indices: [0, 1, 2, 3]
	allow_generated_as_anchor: false
	diverse_selection: true
	compress:
	downsample_ratio: 4
	dynamic:
	enabled: true
	exclude_latest_local_frames: 4
	recent_frames: 8
	conv_kernel_t: 3
	conv_stride_t: 2
	revisit:
	enabled: true
	deterministic_pose_retrieval: true
	fov_overlap_threshold: 0.30
	high_quality_fov_threshold: 0.70
	plucker_weight: 0.10
	max_frames: 2
	# FoV geometry for coverage-based retrieval scoring.
	# fov_half_h/v: half-angles (degrees) of the horizontal/vertical field of view.
	# fov_radius: world-space radius of the sample sphere.
	# fov_{yaw,pitch,depth}_samples: grid resolution for FoV point sampling.
	fov_half_h: 52.5 # 105 deg total horizontal FoV
	fov_half_v: 37.5 # 75 deg total vertical FoV
	fov_radius: 30.0
	fov_yaw_samples: 25
	fov_pitch_samples: 20
	fov_depth_samples: 20
	pose_preselect_topk: 64
	# Plucker descriptor grid for secondary pose-similarity scoring.
	plucker_grid_h: 4
	plucker_grid_w: 4
	plucker_focal_length: 0.35
	compress:
	downsample_ratio: 4
	stage_policy:
	noise_bucket_logging: true
	eval_ablation:
	enabled: false
	branch: A_plus_D_plus_R_normal
	generated_history_proxy:
	enabled: false
	start_step: 0
	ramp_steps: 1
	max_prob: 0.0
	noise_std: 0.25
	dropout_prob: 0.0
	injection:
	dit_hidden_size: 1024
	anchor_gate: 1.0
	dynamic_gate: 1.0
	revisit_gate: 1.0
	cache:
	enabled: true
	device: cpu
	keep_raw_latents: all
	keep_compressed_records: true
	keep_prefix_anchors: true
	eviction_policy: none
	no_evict: true
	clear_between_videos: true
	max_records: null
	on_capacity_exceeded: warn
	checkpoint:
	strict_dememwm_eval_load: true

	diffusion:
	architecture:
	network_size: 64