Clean DeMemWM deterministic memory slot handling

Browse files

Files changed (15) hide show

algorithms/worldmem/dememwm/algorithm.py +70 -49
algorithms/worldmem/dememwm/cache.py +1 -6
algorithms/worldmem/dememwm/diagnostics.py +0 -2
algorithms/worldmem/dememwm/memory.py +6 -14
algorithms/worldmem/dememwm/retrieval.py +0 -1
configurations/algorithm/dememwm_memory_dit.yaml +0 -1
scripts/dememwm_full_eval.slurm +0 -1
scripts/dememwm_full_train.slurm +1 -3
tests/test_dememwm_config_static.py +11 -7
tests/test_dememwm_memory.py +37 -3
tests/test_dememwm_noise_bucket.py +0 -1
tests/test_dememwm_preselection.py +69 -0
tests/test_dememwm_retrieval.py +0 -5
tests/test_dememwm_stream_grad.py +1 -1
train_dememwm_full_berzelius.sh +5 -7

algorithms/worldmem/dememwm/algorithm.py CHANGED Viewed

@@ -42,7 +42,6 @@ class MemoryDiTMixin:
         "revisit_pose_preselect_selected_count",
         "revisit_exact_fov_candidate_count",
         "valid_revisit_frame_count",
-        "valid_revisit_target_count",
         "no_valid_revisit_count",
         "revisit_selected_frame_count",
         "revisit_frame_fov_overlap_mean",
@@ -171,7 +170,7 @@ class MemoryDiTMixin:
             if self._cfg_has(memory_cfg, name)
         ]
         if ratio_fields:
-            raise ValueError(f"standalone DeMemWM uses fixed manual token budgets, not ratio fields: {ratio_fields}")
         anchor_cfg = self._cfg_get(memory_cfg, "anchor", None)
         dynamic_cfg = self._cfg_get(memory_cfg, "dynamic", None)
@@ -247,7 +246,6 @@ class MemoryDiTMixin:
             "revisit_deterministic_fov_plucker_retrieval": True,
             "revisit_local_context_exclusion_frames": self._local_context_exclusion_frames(),
             "revisit_fov_overlap_threshold": -1.0 if fov_overlap_threshold is None else fov_overlap_threshold,
-            "revisit_high_quality_fov_threshold": high_quality_fov_threshold,
             "revisit_plucker_weight": plucker_weight,
             "stage_policy_noise_bucket_logging": True,
         }
@@ -764,13 +762,28 @@ class MemoryDiTMixin:
         if source_positions.numel() <= num_anchors or pose is None:
             return source_positions[:num_anchors]
         poses = pose.float()
-        selected = [0]
-        dists = torch.cdist(poses[0:1], poses).squeeze(0)
-        for _ in range(num_anchors - 1):
             farthest = int(dists.argmax().item())
             selected.append(farthest)
-            d_new = torch.cdist(poses[farthest:farthest + 1], poses).squeeze(0)
             dists = torch.minimum(dists, d_new)
         return source_positions[torch.tensor(sorted(selected), device=source_positions.device)]
     def _build_streaming_cache_records(
@@ -867,10 +880,14 @@ class MemoryDiTMixin:
             source_positions = torch.nonzero(non_generated, as_tuple=False).flatten()
             if source_positions.numel() > 0:
                 if anchor_diverse:
-                    anchor_pose = _pose_subset(source_positions, batch_idx)
-                    selected_anchor_positions = self._select_diverse_anchor_positions(
-                        source_positions, anchor_pose, len(anchor_indices)
-                    )
                 else:
                     selected_list = []
                     for anchor_idx in anchor_indices:
@@ -1339,28 +1356,36 @@ class MemoryDiTMixin:
         }
         return anchor_banks, revisit_banks, tokens_per_frame, diagnostics
     def _records_to_stream(
         self,
         records,
-        max_tokens: int,
         hidden_size: int,
         device: torch.device,
         dtype: torch.dtype,
     ) -> tuple[torch.Tensor, torch.Tensor, int]:
-        max_tokens = max(0, int(max_tokens))
         record_list = list(records)
-        stacked_tokens, stacked_mask = stack_record_tokens(record_list, max_slots=max_tokens)
         max_source_frame = max((int(record.max_source_frame) for record in record_list), default=-1)
-        if stacked_tokens is None or stacked_mask is None or max_tokens == 0:
-            tokens = torch.zeros((max_tokens, hidden_size), device=device, dtype=dtype)
-            mask = torch.zeros((max_tokens,), device=device, dtype=torch.bool)
             return tokens, mask, max_source_frame
-        n = min(max_tokens, stacked_tokens.shape[0])
         filled = stacked_tokens[:n].to(device=device, dtype=dtype)
         filled_mask = stacked_mask[:n].to(device=device, dtype=torch.bool)
-        if n < max_tokens:
-            pad = filled.new_zeros(max_tokens - n, hidden_size)
-            pad_mask = torch.zeros(max_tokens - n, device=device, dtype=torch.bool)
             tokens = torch.cat([filled, pad], dim=0)
             mask = torch.cat([filled_mask, pad_mask], dim=0)
         else:
@@ -1520,10 +1545,10 @@ class MemoryDiTMixin:
         revisit_pool_h, revisit_pool_w = self._resolve_spatial_pool_size(
             revisit_compress_cfg, revisit_src_h, revisit_src_w, 5, 8
         )
-        revisit_max_tokens = revisit_max_frames * revisit_pool_h * revisit_pool_w
         recent_frames = int(self._cfg_get(dynamic_cfg, "recent_frames", 8))
-        exclude_latest_local_frames = int(self._cfg_get(dynamic_cfg, "exclude_latest_local_frames", 4))
-        local_context_exclusion_frames = self._local_context_exclusion_frames()
         fov_overlap_threshold = self._cfg_get(revisit_cfg, "fov_overlap_threshold", 0.30)
         high_quality_fov_threshold = float(self._cfg_get(revisit_cfg, "high_quality_fov_threshold", 0.70))
         plucker_weight = float(self._cfg_get(revisit_cfg, "plucker_weight", 0.10))
@@ -1568,7 +1593,7 @@ class MemoryDiTMixin:
                 dtype=stream_dtype,
                 max_recent_frames=recent_frames,
                 target_frame_indices=target_frame_indices,
-                exclude_latest_local_frames=exclude_latest_local_frames,
             )
             if raw_latents is not None:
                 dynamic_latents = raw_latents
@@ -1631,7 +1656,7 @@ class MemoryDiTMixin:
                 revisit_pool_h,
                 revisit_pool_w,
                 revisit_max_frames,
-                local_context_exclusion_frames,
                 fov_overlap_threshold,
                 plucker_weight,
                 revisit_retrieval_kwargs,
@@ -1641,7 +1666,7 @@ class MemoryDiTMixin:
         T_tgt = target_frame_indices.shape[0]
         anchor_slots = max(0, anchor_num_tokens)
-        revisit_slots = max(0, revisit_max_tokens)
         anchor_source_type = None if allow_generated_anchor else MemorySourceType.PREFIX_GT
         anchor_include_generated = allow_generated_anchor
         anchor_token_rows = []
@@ -1659,7 +1684,6 @@ class MemoryDiTMixin:
                         source_type=anchor_source_type,
                         include_generated=anchor_include_generated,
                         max_records=len(anchor_indices),
-                        max_slots=anchor_slots,
                     )
                 )
                 anchor_bank.assert_causal(target_frame, records)
@@ -1693,7 +1717,7 @@ class MemoryDiTMixin:
                 "dynamic_min_gap_to_target_per_target": torch.full((B, T_tgt), -1, dtype=torch.long, device=stream_device),
                 "dynamic_max_gap_to_target_per_target": torch.full((B, T_tgt), -1, dtype=torch.long, device=stream_device),
                 "dynamic_overlap_with_c_short_count_per_target": torch.zeros((B, T_tgt), dtype=torch.long, device=stream_device),
-                "dynamic_exclude_latest_local_frames": exclude_latest_local_frames,
             }
         else:
             # Pre-select dynamic source frame positions using only frame index metadata
@@ -1705,7 +1729,7 @@ class MemoryDiTMixin:
             for _b in range(B):
                 for _j in range(T_tgt):
                     _target = int(target_frame_indices[_j, _b].item())
-                    _valid = (_dfi[:, _b] < _target - exclude_latest_local_frames).nonzero(as_tuple=False).flatten()
                     _needed.extend(_valid[-_max_src:].tolist())
             if _needed:
                 _needed_idx = torch.tensor(sorted(set(_needed)), device=stream_device, dtype=torch.long)
@@ -1727,7 +1751,7 @@ class MemoryDiTMixin:
                 _dynamic_pose_small,
                 target_frame_indices,
                 _dynamic_gen_small,
-                exclude_latest_local_frames=exclude_latest_local_frames,
             )
         dynamic_min_gap_tensor = torch.as_tensor(
@@ -1785,7 +1809,6 @@ class MemoryDiTMixin:
         revisit_best_selected_fov_overlap = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.float32)
         revisit_best_selected_plucker_overlap = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.float32)
         revisit_selected_gap_frames = torch.full((B, T_tgt), -1.0, device=stream_device, dtype=torch.float32)
-        valid_revisit_target_mask = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.bool)
         eval_corrupted_revisit_mask = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.bool)
         revisit_causal_max = torch.full((B, T_tgt), -1, device=stream_device, dtype=torch.long)
         eval_corruption_enabled = bool(eval_ablation_enabled and eval_ablation_branch in EVAL_CORRUPTION_BRANCHES)
@@ -1801,7 +1824,10 @@ class MemoryDiTMixin:
             for target_idx in range(T_tgt):
                 target_frame = int(target_frame_indices[target_idx, batch_idx].item())
                 if use_cache_revisit_records:
-                    candidate_records = list(revisit_record_batches[batch_idx])
                 else:
                     candidate_records = revisit_bank.query(
                         MemoryBankQuery(
@@ -1815,7 +1841,7 @@ class MemoryDiTMixin:
                     target_pose=_target_tensor_or_none(target_pose_source, batch_idx, target_idx),
                     target_summary=None,
                     topk=revisit_max_frames,
-                    exclude_local_context_frames=local_context_exclusion_frames,
                     fov_overlap_threshold=fov_overlap_threshold,
                     plucker_weight=plucker_weight,
                     target_video_id=_target_video_id_or_none(batch_idx, target_idx),
@@ -1840,7 +1866,6 @@ class MemoryDiTMixin:
                 revisit_best_selected_fov_overlap[batch_idx, target_idx] = float(result.diagnostics.get("best_selected_fov_overlap", 0.0))
                 revisit_best_selected_plucker_overlap[batch_idx, target_idx] = float(result.diagnostics.get("best_selected_plucker_overlap", 0.0))
                 revisit_selected_gap_frames[batch_idx, target_idx] = float(result.diagnostics.get("best_selected_gap_frames", -1))
-                valid_revisit_target_mask[batch_idx, target_idx] = bool(result.diagnostics.get("valid_revisit_target_count", 0))
                 revisit_bank.assert_causal(target_frame, selected_records)
                 if selected_records:
                     valid_revisit_mask[batch_idx, target_idx] = True
@@ -1860,10 +1885,9 @@ class MemoryDiTMixin:
                         target_frame=target_frame,
                     )
                     eval_corrupted_revisit_mask[batch_idx, target_idx] = bool(was_corrupted)
-                actual_max_source_frame = max((int(record.max_source_frame) for record in selected_records), default=max_source_frame)
                 batch_token_rows.append(stream_tokens)
                 batch_mask_rows.append(stream_mask)
-                batch_max_rows.append(torch.as_tensor(actual_max_source_frame, device=stream_device, dtype=torch.long))
             revisit_token_rows.append(torch.stack(batch_token_rows, dim=0))
             revisit_mask_rows.append(torch.stack(batch_mask_rows, dim=0))
             revisit_max_rows.append(torch.stack(batch_max_rows, dim=0))
@@ -1872,15 +1896,15 @@ class MemoryDiTMixin:
         revisit_max = torch.stack(revisit_max_rows, dim=0)
         if anchor_tokens.shape[-2] != anchor_num_tokens:
-            raise AssertionError(f"anchor token budget mismatch: got {anchor_tokens.shape[-2]}, expected {anchor_num_tokens}")
         if dynamic_latents is not None and dynamic_latents.shape[0] > 0:
             _expected_dyn = self.dememwm_dynamic_compressor.tokens_per_target(
                 int(dynamic_latents.shape[-2]), int(dynamic_latents.shape[-1])
             )
             if dynamic_tokens.shape[-2] != _expected_dyn:
-                raise AssertionError(f"dynamic token budget mismatch: got {dynamic_tokens.shape[-2]}, expected {_expected_dyn}")
-        if revisit_tokens.shape[-2] > revisit_max_tokens:
-            raise AssertionError(f"revisit token cap exceeded: got {revisit_tokens.shape[-2]}, cap {revisit_max_tokens}")
         anchor_gate = gates.anchor_gate if anchor_effective_enabled else 0.0
         dynamic_gate = gates.dynamic_gate if dynamic_effective_enabled else 0.0
         gate_module = getattr(self, "dememwm_revisit_gate", None)
@@ -1913,7 +1937,6 @@ class MemoryDiTMixin:
             revisit_best_selected_fov_overlap = torch.zeros_like(revisit_best_selected_fov_overlap)
             revisit_best_selected_plucker_overlap = torch.zeros_like(revisit_best_selected_plucker_overlap)
             revisit_selected_gap_frames = torch.full_like(revisit_selected_gap_frames, -1.0)
-            valid_revisit_target_mask = torch.zeros_like(valid_revisit_target_mask)
             eval_corrupted_revisit_mask = torch.zeros_like(eval_corrupted_revisit_mask)
             valid_revisit_eff_mask = torch.zeros_like(valid_revisit_eff_mask)
             revisit_gate_raw = torch.zeros_like(revisit_gate_raw)
@@ -1948,7 +1971,6 @@ class MemoryDiTMixin:
             "revisit_gate_eff": revisit_gate.detach() if torch.is_tensor(revisit_gate) else torch.tensor(float(revisit_gate)),
             "no_valid_revisit_mask": no_valid_revisit_mask,
             "valid_revisit_eff_mask": valid_revisit_eff_mask,
-            "valid_revisit_target_mask": valid_revisit_target_mask,
             "revisit_candidate_frame_count_per_target": revisit_candidate_count,
             "revisit_selected_frame_count_per_target": revisit_selected_count,
             "revisit_best_selected_fov_overlap_per_target": revisit_best_selected_fov_overlap,
@@ -1972,18 +1994,17 @@ class MemoryDiTMixin:
             "token_patch_size": token_patch_size,
             "tokens_per_frame": tokens_per_frame,
             "anchor_token_slots": int(anchor_tokens.shape[-2]),
-            "anchor_budget_tokens": anchor_num_tokens,
             "anchor_pool_h": anchor_pool_h,
             "anchor_pool_w": anchor_pool_w,
             "dynamic_token_slots": int(dynamic_tokens.shape[-2]),
-            "dynamic_budget_tokens": int(dynamic_tokens.shape[-2]),
             "dynamic_min_gap_to_target": dynamic_min_gap_to_target,
             "dynamic_max_gap_to_target": dynamic_max_gap_to_target,
-            "dynamic_exclude_latest_local_frames": exclude_latest_local_frames,
             "revisit_token_slots": int(revisit_tokens.shape[-2]),
-            "revisit_max_tokens": revisit_max_tokens,
-            "revisit_local_context_exclusion_frames": local_context_exclusion_frames,
-            "revisit_high_quality_fov_threshold": high_quality_fov_threshold,
             "revisit_pool_h": revisit_pool_h,
             "revisit_pool_w": revisit_pool_w,
             "revisit_max_frames": revisit_max_frames,

         "revisit_pose_preselect_selected_count",
         "revisit_exact_fov_candidate_count",
         "valid_revisit_frame_count",
         "no_valid_revisit_count",
         "revisit_selected_frame_count",
         "revisit_frame_fov_overlap_mean",
             if self._cfg_has(memory_cfg, name)
         ]
         if ratio_fields:
+            raise ValueError(f"standalone DeMemWM derives stream slots from latent shape and compression settings, not ratio fields: {ratio_fields}")
         anchor_cfg = self._cfg_get(memory_cfg, "anchor", None)
         dynamic_cfg = self._cfg_get(memory_cfg, "dynamic", None)
             "revisit_deterministic_fov_plucker_retrieval": True,
             "revisit_local_context_exclusion_frames": self._local_context_exclusion_frames(),
             "revisit_fov_overlap_threshold": -1.0 if fov_overlap_threshold is None else fov_overlap_threshold,
             "revisit_plucker_weight": plucker_weight,
             "stage_policy_noise_bucket_logging": True,
         }
         if source_positions.numel() <= num_anchors or pose is None:
             return source_positions[:num_anchors]
         poses = pose.float()
+        pairwise = torch.cdist(poses, poses)
+        if not bool((pairwise > 0).any().item()):
+            return source_positions[:num_anchors]
+        available = torch.ones((int(source_positions.numel()),), device=poses.device, dtype=torch.bool)
+        if num_anchors == 1:
+            selected = [int(pairwise.mean(dim=1).argmax().item())]
+        else:
+            first, second = divmod(int(pairwise.argmax().item()), int(pairwise.shape[1]))
+            selected = [int(first), int(second)]
+        for idx in selected:
+            available[idx] = False
+        dists = pairwise[selected].min(dim=0).values
+        dists = dists.masked_fill(~available, float("-inf"))
+        for _ in range(num_anchors - len(selected)):
             farthest = int(dists.argmax().item())
+            if not bool(available[farthest].item()):
+                break
             selected.append(farthest)
+            available[farthest] = False
+            d_new = pairwise[farthest]
             dists = torch.minimum(dists, d_new)
+            dists = dists.masked_fill(~available, float("-inf"))
         return source_positions[torch.tensor(sorted(selected), device=source_positions.device)]
     def _build_streaming_cache_records(
             source_positions = torch.nonzero(non_generated, as_tuple=False).flatten()
             if source_positions.numel() > 0:
                 if anchor_diverse:
+                    anchor_source_positions = source_positions[source_positions < self._context_frame_count()]
+                    if anchor_source_positions.numel() > 0:
+                        anchor_pose = _pose_subset(anchor_source_positions, batch_idx)
+                        selected_anchor_positions = self._select_diverse_anchor_positions(
+                            anchor_source_positions, anchor_pose, len(anchor_indices)
+                        )
+                    else:
+                        selected_anchor_positions = source_positions[:0]
                 else:
                     selected_list = []
                     for anchor_idx in anchor_indices:
         }
         return anchor_banks, revisit_banks, tokens_per_frame, diagnostics
+    def _causal_cached_revisit_records(
+        self,
+        records: Iterable[MemoryRecord],
+        target_frame: int,
+    ) -> list[MemoryRecord]:
+        target_frame = int(target_frame)
+        return [record for record in records if int(record.source_end) <= target_frame]
     def _records_to_stream(
         self,
         records,
+        target_slots: int,
         hidden_size: int,
         device: torch.device,
         dtype: torch.dtype,
     ) -> tuple[torch.Tensor, torch.Tensor, int]:
+        target_slots = max(0, int(target_slots))
         record_list = list(records)
+        stacked_tokens, stacked_mask = stack_record_tokens(record_list, target_slots=target_slots)
         max_source_frame = max((int(record.max_source_frame) for record in record_list), default=-1)
+        if stacked_tokens is None or stacked_mask is None or target_slots == 0:
+            tokens = torch.zeros((target_slots, hidden_size), device=device, dtype=dtype)
+            mask = torch.zeros((target_slots,), device=device, dtype=torch.bool)
             return tokens, mask, max_source_frame
+        n = min(target_slots, stacked_tokens.shape[0])
         filled = stacked_tokens[:n].to(device=device, dtype=dtype)
         filled_mask = stacked_mask[:n].to(device=device, dtype=torch.bool)
+        if n < target_slots:
+            pad = filled.new_zeros(target_slots - n, hidden_size)
+            pad_mask = torch.zeros(target_slots - n, device=device, dtype=torch.bool)
             tokens = torch.cat([filled, pad], dim=0)
             mask = torch.cat([filled_mask, pad_mask], dim=0)
         else:
         revisit_pool_h, revisit_pool_w = self._resolve_spatial_pool_size(
             revisit_compress_cfg, revisit_src_h, revisit_src_w, 5, 8
         )
+        revisit_target_slots = revisit_max_frames * revisit_pool_h * revisit_pool_w
         recent_frames = int(self._cfg_get(dynamic_cfg, "recent_frames", 8))
+        dynamic_recent_exclusion_frames = int(self._cfg_get(dynamic_cfg, "exclude_latest_local_frames", 4))
+        revisit_context_window_exclusion_frames = self._local_context_exclusion_frames()
         fov_overlap_threshold = self._cfg_get(revisit_cfg, "fov_overlap_threshold", 0.30)
         high_quality_fov_threshold = float(self._cfg_get(revisit_cfg, "high_quality_fov_threshold", 0.70))
         plucker_weight = float(self._cfg_get(revisit_cfg, "plucker_weight", 0.10))
                 dtype=stream_dtype,
                 max_recent_frames=recent_frames,
                 target_frame_indices=target_frame_indices,
+                exclude_latest_local_frames=dynamic_recent_exclusion_frames,
             )
             if raw_latents is not None:
                 dynamic_latents = raw_latents
                 revisit_pool_h,
                 revisit_pool_w,
                 revisit_max_frames,
+                revisit_context_window_exclusion_frames,
                 fov_overlap_threshold,
                 plucker_weight,
                 revisit_retrieval_kwargs,
         T_tgt = target_frame_indices.shape[0]
         anchor_slots = max(0, anchor_num_tokens)
+        revisit_slots = max(0, revisit_target_slots)
         anchor_source_type = None if allow_generated_anchor else MemorySourceType.PREFIX_GT
         anchor_include_generated = allow_generated_anchor
         anchor_token_rows = []
                         source_type=anchor_source_type,
                         include_generated=anchor_include_generated,
                         max_records=len(anchor_indices),
                     )
                 )
                 anchor_bank.assert_causal(target_frame, records)
                 "dynamic_min_gap_to_target_per_target": torch.full((B, T_tgt), -1, dtype=torch.long, device=stream_device),
                 "dynamic_max_gap_to_target_per_target": torch.full((B, T_tgt), -1, dtype=torch.long, device=stream_device),
                 "dynamic_overlap_with_c_short_count_per_target": torch.zeros((B, T_tgt), dtype=torch.long, device=stream_device),
+                "dynamic_exclude_latest_local_frames": dynamic_recent_exclusion_frames,
             }
         else:
             # Pre-select dynamic source frame positions using only frame index metadata
             for _b in range(B):
                 for _j in range(T_tgt):
                     _target = int(target_frame_indices[_j, _b].item())
+                    _valid = (_dfi[:, _b] < _target - dynamic_recent_exclusion_frames).nonzero(as_tuple=False).flatten()
                     _needed.extend(_valid[-_max_src:].tolist())
             if _needed:
                 _needed_idx = torch.tensor(sorted(set(_needed)), device=stream_device, dtype=torch.long)
                 _dynamic_pose_small,
                 target_frame_indices,
                 _dynamic_gen_small,
+                exclude_latest_local_frames=dynamic_recent_exclusion_frames,
             )
         dynamic_min_gap_tensor = torch.as_tensor(
         revisit_best_selected_fov_overlap = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.float32)
         revisit_best_selected_plucker_overlap = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.float32)
         revisit_selected_gap_frames = torch.full((B, T_tgt), -1.0, device=stream_device, dtype=torch.float32)
         eval_corrupted_revisit_mask = torch.zeros((B, T_tgt), device=stream_device, dtype=torch.bool)
         revisit_causal_max = torch.full((B, T_tgt), -1, device=stream_device, dtype=torch.long)
         eval_corruption_enabled = bool(eval_ablation_enabled and eval_ablation_branch in EVAL_CORRUPTION_BRANCHES)
             for target_idx in range(T_tgt):
                 target_frame = int(target_frame_indices[target_idx, batch_idx].item())
                 if use_cache_revisit_records:
+                    candidate_records = self._causal_cached_revisit_records(
+                        revisit_record_batches[batch_idx],
+                        target_frame,
+                    )
                 else:
                     candidate_records = revisit_bank.query(
                         MemoryBankQuery(
                     target_pose=_target_tensor_or_none(target_pose_source, batch_idx, target_idx),
                     target_summary=None,
                     topk=revisit_max_frames,
+                    exclude_local_context_frames=revisit_context_window_exclusion_frames,
                     fov_overlap_threshold=fov_overlap_threshold,
                     plucker_weight=plucker_weight,
                     target_video_id=_target_video_id_or_none(batch_idx, target_idx),
                 revisit_best_selected_fov_overlap[batch_idx, target_idx] = float(result.diagnostics.get("best_selected_fov_overlap", 0.0))
                 revisit_best_selected_plucker_overlap[batch_idx, target_idx] = float(result.diagnostics.get("best_selected_plucker_overlap", 0.0))
                 revisit_selected_gap_frames[batch_idx, target_idx] = float(result.diagnostics.get("best_selected_gap_frames", -1))
                 revisit_bank.assert_causal(target_frame, selected_records)
                 if selected_records:
                     valid_revisit_mask[batch_idx, target_idx] = True
                         target_frame=target_frame,
                     )
                     eval_corrupted_revisit_mask[batch_idx, target_idx] = bool(was_corrupted)
                 batch_token_rows.append(stream_tokens)
                 batch_mask_rows.append(stream_mask)
+                batch_max_rows.append(torch.as_tensor(max_source_frame, device=stream_device, dtype=torch.long))
             revisit_token_rows.append(torch.stack(batch_token_rows, dim=0))
             revisit_mask_rows.append(torch.stack(batch_mask_rows, dim=0))
             revisit_max_rows.append(torch.stack(batch_max_rows, dim=0))
         revisit_max = torch.stack(revisit_max_rows, dim=0)
         if anchor_tokens.shape[-2] != anchor_num_tokens:
+            raise AssertionError(f"anchor slot count mismatch: got {anchor_tokens.shape[-2]}, expected {anchor_num_tokens}")
         if dynamic_latents is not None and dynamic_latents.shape[0] > 0:
             _expected_dyn = self.dememwm_dynamic_compressor.tokens_per_target(
                 int(dynamic_latents.shape[-2]), int(dynamic_latents.shape[-1])
             )
             if dynamic_tokens.shape[-2] != _expected_dyn:
+                raise AssertionError(f"dynamic slot count mismatch: got {dynamic_tokens.shape[-2]}, expected {_expected_dyn}")
+        if revisit_tokens.shape[-2] != revisit_target_slots:
+            raise AssertionError(f"revisit slot count mismatch: got {revisit_tokens.shape[-2]}, expected {revisit_target_slots}")
         anchor_gate = gates.anchor_gate if anchor_effective_enabled else 0.0
         dynamic_gate = gates.dynamic_gate if dynamic_effective_enabled else 0.0
         gate_module = getattr(self, "dememwm_revisit_gate", None)
             revisit_best_selected_fov_overlap = torch.zeros_like(revisit_best_selected_fov_overlap)
             revisit_best_selected_plucker_overlap = torch.zeros_like(revisit_best_selected_plucker_overlap)
             revisit_selected_gap_frames = torch.full_like(revisit_selected_gap_frames, -1.0)
             eval_corrupted_revisit_mask = torch.zeros_like(eval_corrupted_revisit_mask)
             valid_revisit_eff_mask = torch.zeros_like(valid_revisit_eff_mask)
             revisit_gate_raw = torch.zeros_like(revisit_gate_raw)
             "revisit_gate_eff": revisit_gate.detach() if torch.is_tensor(revisit_gate) else torch.tensor(float(revisit_gate)),
             "no_valid_revisit_mask": no_valid_revisit_mask,
             "valid_revisit_eff_mask": valid_revisit_eff_mask,
             "revisit_candidate_frame_count_per_target": revisit_candidate_count,
             "revisit_selected_frame_count_per_target": revisit_selected_count,
             "revisit_best_selected_fov_overlap_per_target": revisit_best_selected_fov_overlap,
             "token_patch_size": token_patch_size,
             "tokens_per_frame": tokens_per_frame,
             "anchor_token_slots": int(anchor_tokens.shape[-2]),
+            "anchor_target_slots": anchor_num_tokens,
             "anchor_pool_h": anchor_pool_h,
             "anchor_pool_w": anchor_pool_w,
             "dynamic_token_slots": int(dynamic_tokens.shape[-2]),
+            "dynamic_target_slots": int(dynamic_tokens.shape[-2]),
             "dynamic_min_gap_to_target": dynamic_min_gap_to_target,
             "dynamic_max_gap_to_target": dynamic_max_gap_to_target,
+            "dynamic_exclude_latest_local_frames": dynamic_recent_exclusion_frames,
             "revisit_token_slots": int(revisit_tokens.shape[-2]),
+            "revisit_target_slots": revisit_target_slots,
+            "revisit_local_context_exclusion_frames": revisit_context_window_exclusion_frames,
             "revisit_pool_h": revisit_pool_h,
             "revisit_pool_w": revisit_pool_w,
             "revisit_max_frames": revisit_max_frames,

algorithms/worldmem/dememwm/cache.py CHANGED Viewed

@@ -39,7 +39,6 @@ class StreamingCache:
         no_evict: bool = True,
         clear_between_videos: bool = True,
         max_records: Optional[int] = None,
-        max_slots: Optional[int] = None,
         on_capacity_exceeded: str = "warn",
     ) -> None:
         self.enabled = bool(enabled)
@@ -51,7 +50,6 @@ class StreamingCache:
         self.no_evict = bool(no_evict)
         self.clear_between_videos = bool(clear_between_videos)
         self.max_records = max_records
-        self.max_slots = max_slots
         self.on_capacity_exceeded = str(on_capacity_exceeded or "warn")
         if self.eviction_policy != "none" or not self.no_evict:
             raise ValueError("DeMemWMStreamingCache only supports eviction_policy='none' with no_evict=true")
@@ -92,7 +90,6 @@ class StreamingCache:
             no_evict=bool(get("no_evict", True)),
             clear_between_videos=bool(get("clear_between_videos", True)),
             max_records=get("max_records", None),
-            max_slots=get("max_slots", None),
             on_capacity_exceeded=str(get("on_capacity_exceeded", "warn")),
         )
@@ -213,14 +210,12 @@ class StreamingCache:
         exceeded = False
         if self.max_records is not None and self.record_count > int(self.max_records):
             exceeded = True
-        if self.max_slots is not None and self.slot_count > int(self.max_slots):
-            exceeded = True
         if not exceeded:
             return
         self.capacity_exceeded_count += 1
         msg = (
             "DeMemWMStreamingCache capacity exceeded "
-            f"records={self.record_count}/{self.max_records}, slots={self.slot_count}/{self.max_slots}; "
             "no eviction performed because no_evict=true"
         )
         if self.on_capacity_exceeded == "error":

         no_evict: bool = True,
         clear_between_videos: bool = True,
         max_records: Optional[int] = None,
         on_capacity_exceeded: str = "warn",
     ) -> None:
         self.enabled = bool(enabled)
         self.no_evict = bool(no_evict)
         self.clear_between_videos = bool(clear_between_videos)
         self.max_records = max_records
         self.on_capacity_exceeded = str(on_capacity_exceeded or "warn")
         if self.eviction_policy != "none" or not self.no_evict:
             raise ValueError("DeMemWMStreamingCache only supports eviction_policy='none' with no_evict=true")
             no_evict=bool(get("no_evict", True)),
             clear_between_videos=bool(get("clear_between_videos", True)),
             max_records=get("max_records", None),
             on_capacity_exceeded=str(get("on_capacity_exceeded", "warn")),
         )
         exceeded = False
         if self.max_records is not None and self.record_count > int(self.max_records):
             exceeded = True
         if not exceeded:
             return
         self.capacity_exceeded_count += 1
         msg = (
             "DeMemWMStreamingCache capacity exceeded "
+            f"records={self.record_count}/{self.max_records}; "
             "no eviction performed because no_evict=true"
         )
         if self.on_capacity_exceeded == "error":

algorithms/worldmem/dememwm/diagnostics.py CHANGED Viewed

@@ -64,7 +64,6 @@ def summarize_revisit_diagnostics(result_diagnostics: list[dict[str, Any]], vali
     exact_fov_candidate_count = sum(int(diag.get("revisit_exact_fov_candidate_count", 0)) for diag in result_diagnostics)
     valid_count = sum(int(diag.get("valid_revisit_frame_count", diag.get("valid_revisit_count", diag.get("valid_candidate_count", 0)))) for diag in result_diagnostics)
     valid_count_mean = float(valid_count / target_count) if target_count else 0.0
-    valid_target_count = sum(int(diag.get("valid_revisit_target_count", diag.get("high_quality_selected_revisit", 0))) for diag in result_diagnostics)
     selected_count = sum(int(diag.get("revisit_selected_frame_count", diag.get("revisit_selected_count", diag.get("selected_count", 0)))) for diag in result_diagnostics)
     no_valid_count = sum(int(diag.get("no_valid_revisit_count", 0)) for diag in result_diagnostics)
     abstained_count = sum(int(diag.get("revisit_abstained_count", int(bool(diag.get("abstained", False))))) for diag in result_diagnostics)
@@ -78,7 +77,6 @@ def summarize_revisit_diagnostics(result_diagnostics: list[dict[str, Any]], vali
         "revisit_exact_fov_candidate_count": float(exact_fov_candidate_count / target_count) if target_count else 0.0,
         "valid_revisit_frame_count": valid_count_mean,
         "valid_revisit_count": valid_count_mean,
-        "valid_revisit_target_count": int(valid_target_count),
         "no_valid_revisit_count": int(no_valid_count),
         "valid_revisit_mask_fraction": tensor_valid_fraction(valid_revisit_mask),
         "revisit_selected_frame_count": int(selected_count),

     exact_fov_candidate_count = sum(int(diag.get("revisit_exact_fov_candidate_count", 0)) for diag in result_diagnostics)
     valid_count = sum(int(diag.get("valid_revisit_frame_count", diag.get("valid_revisit_count", diag.get("valid_candidate_count", 0)))) for diag in result_diagnostics)
     valid_count_mean = float(valid_count / target_count) if target_count else 0.0
     selected_count = sum(int(diag.get("revisit_selected_frame_count", diag.get("revisit_selected_count", diag.get("selected_count", 0)))) for diag in result_diagnostics)
     no_valid_count = sum(int(diag.get("no_valid_revisit_count", 0)) for diag in result_diagnostics)
     abstained_count = sum(int(diag.get("revisit_abstained_count", int(bool(diag.get("abstained", False))))) for diag in result_diagnostics)
         "revisit_exact_fov_candidate_count": float(exact_fov_candidate_count / target_count) if target_count else 0.0,
         "valid_revisit_frame_count": valid_count_mean,
         "valid_revisit_count": valid_count_mean,
         "no_valid_revisit_count": int(no_valid_count),
         "valid_revisit_mask_fraction": tensor_valid_fraction(valid_revisit_mask),
         "revisit_selected_frame_count": int(selected_count),

algorithms/worldmem/dememwm/memory.py CHANGED Viewed

@@ -15,15 +15,13 @@ class MemoryBankQuery:
     source_type: Optional[MemorySourceType] = None
     include_generated: bool = True
     max_records: Optional[int] = None
-    max_slots: Optional[int] = None
 class CausalMemoryBank:
     """Small causal memory bank for DeMemWM records."""
-    def __init__(self, max_records: Optional[int] = None, max_slots: Optional[int] = None):
         self.max_records = max_records
-        self.max_slots = max_slots
         self._records: list[MemoryRecord] = []
     def __len__(self) -> int:
@@ -172,7 +170,6 @@ class CausalMemoryBank:
         if isinstance(query, int):
             query = MemoryBankQuery(target_frame=query, **kwargs)
         out: list[MemoryRecord] = []
-        used_slots = 0
         for record in self._records:
             if int(record.source_end) > int(query.target_frame):
                 continue
@@ -180,15 +177,9 @@ class CausalMemoryBank:
                 continue
             if not query.include_generated and record.is_generated:
                 continue
-            if query.max_slots is not None and used_slots >= query.max_slots:
-                break
             out.append(record)
-            if query.max_slots is not None:
-                used_slots += record.valid_slots
             if query.max_records is not None and len(out) >= query.max_records:
                 break
-            if query.max_slots is not None and used_slots >= query.max_slots:
-                break
         return out
     def assert_causal(self, target_frame: int, records: Iterable[MemoryRecord]) -> None:
@@ -197,12 +188,13 @@ class CausalMemoryBank:
             raise AssertionError(f"future/non-causal memory selected for target {target_frame}: {offenders}")
-def stack_record_tokens(records: list[MemoryRecord], max_slots: int | None = None):
     if not records:
         return None, None
     tokens = torch.cat([r.tokens for r in records], dim=0)
     mask = torch.cat([r.mask.bool() for r in records], dim=0)
-    if max_slots is not None:
-        tokens = tokens[:max_slots]
-        mask = mask[:max_slots]
     return tokens, mask

     source_type: Optional[MemorySourceType] = None
     include_generated: bool = True
     max_records: Optional[int] = None
 class CausalMemoryBank:
     """Small causal memory bank for DeMemWM records."""
+    def __init__(self, max_records: Optional[int] = None):
         self.max_records = max_records
         self._records: list[MemoryRecord] = []
     def __len__(self) -> int:
         if isinstance(query, int):
             query = MemoryBankQuery(target_frame=query, **kwargs)
         out: list[MemoryRecord] = []
         for record in self._records:
             if int(record.source_end) > int(query.target_frame):
                 continue
                 continue
             if not query.include_generated and record.is_generated:
                 continue
             out.append(record)
             if query.max_records is not None and len(out) >= query.max_records:
                 break
         return out
     def assert_causal(self, target_frame: int, records: Iterable[MemoryRecord]) -> None:
             raise AssertionError(f"future/non-causal memory selected for target {target_frame}: {offenders}")
+def stack_record_tokens(records: list[MemoryRecord], target_slots: int | None = None):
     if not records:
         return None, None
     tokens = torch.cat([r.tokens for r in records], dim=0)
     mask = torch.cat([r.mask.bool() for r in records], dim=0)
+    if target_slots is not None:
+        valid_idx = mask.nonzero(as_tuple=False).flatten()
+        tokens = tokens.index_select(0, valid_idx)[:target_slots]
+        mask = mask.index_select(0, valid_idx)[:target_slots]
     return tokens, mask

algorithms/worldmem/dememwm/retrieval.py CHANGED Viewed

@@ -427,7 +427,6 @@ def deterministic_revisit_retrieval(
         "revisit_candidate_count": len(causal_records),
         "valid_revisit_frame_count": len(valid_labels),
         "valid_revisit_count": len(valid_labels),
-        "valid_revisit_target_count": high_quality_selected,
         "no_valid_revisit_count": int(len(valid_labels) == 0),
         "valid_revisit_mask": int(len(valid_labels) > 0),
         "revisit_abstained_count": int(len(selected_records) == 0),

         "revisit_candidate_count": len(causal_records),
         "valid_revisit_frame_count": len(valid_labels),
         "valid_revisit_count": len(valid_labels),
         "no_valid_revisit_count": int(len(valid_labels) == 0),
         "valid_revisit_mask": int(len(valid_labels) > 0),
         "revisit_abstained_count": int(len(selected_records) == 0),

configurations/algorithm/dememwm_memory_dit.yaml CHANGED Viewed

@@ -93,7 +93,6 @@ dememwm:
     no_evict: true
     clear_between_videos: true
     max_records: null
-    max_slots: null
     on_capacity_exceeded: warn
   checkpoint:
     strict_dememwm_eval_load: true

     no_evict: true
     clear_between_videos: true
     max_records: null
     on_capacity_exceeded: warn
   checkpoint:
     strict_dememwm_eval_load: true

scripts/dememwm_full_eval.slurm CHANGED Viewed

@@ -150,7 +150,6 @@ EVAL_ARGS=(
   "++algorithm.dememwm.cache.no_evict=true"
   "++algorithm.dememwm.cache.clear_between_videos=true"
   "++algorithm.dememwm.cache.max_records=null"
-  "++algorithm.dememwm.cache.max_slots=null"
   "++algorithm.dememwm.cache.on_capacity_exceeded=warn"
   "experiment.validation.batch_size=${VAL_BATCH_SIZE}"
   "experiment.validation.limit_batch=${VAL_LIMIT}"

   "++algorithm.dememwm.cache.no_evict=true"
   "++algorithm.dememwm.cache.clear_between_videos=true"
   "++algorithm.dememwm.cache.max_records=null"
   "++algorithm.dememwm.cache.on_capacity_exceeded=warn"
   "experiment.validation.batch_size=${VAL_BATCH_SIZE}"
   "experiment.validation.limit_batch=${VAL_LIMIT}"

scripts/dememwm_full_train.slurm CHANGED Viewed

@@ -69,8 +69,7 @@ srun python -m main \
     ++algorithm.dememwm.dynamic.recent_frames=4 \
     ++algorithm.dememwm.revisit.enabled=true \
     ++algorithm.dememwm.revisit.deterministic_pose_retrieval=true \
-    ++algorithm.dememwm.revisit.fov_overlap_threshold=0.30 \
-    ++algorithm.dememwm.revisit.high_quality_fov_threshold=0.70 \
     ++algorithm.dememwm.revisit.pose_preselect_topk=64 \
     ++algorithm.dememwm.revisit.fov_yaw_samples=25 \
     ++algorithm.dememwm.revisit.fov_pitch_samples=20 \
@@ -87,7 +86,6 @@ srun python -m main \
     ++algorithm.dememwm.cache.no_evict=true \
     ++algorithm.dememwm.cache.clear_between_videos=true \
     ++algorithm.dememwm.cache.max_records=null \
-    ++algorithm.dememwm.cache.max_slots=null \
     ++algorithm.dememwm.cache.on_capacity_exceeded=warn \
     ++algorithm.dememwm.curriculum.enabled=true \
     ++algorithm.dememwm.curriculum.full_stage_start_step=20000 \

     ++algorithm.dememwm.dynamic.recent_frames=4 \
     ++algorithm.dememwm.revisit.enabled=true \
     ++algorithm.dememwm.revisit.deterministic_pose_retrieval=true \
+    ++algorithm.dememwm.revisit.fov_overlap_threshold=0.60 \
     ++algorithm.dememwm.revisit.pose_preselect_topk=64 \
     ++algorithm.dememwm.revisit.fov_yaw_samples=25 \
     ++algorithm.dememwm.revisit.fov_pitch_samples=20 \
     ++algorithm.dememwm.cache.no_evict=true \
     ++algorithm.dememwm.cache.clear_between_videos=true \
     ++algorithm.dememwm.cache.max_records=null \
     ++algorithm.dememwm.cache.on_capacity_exceeded=warn \
     ++algorithm.dememwm.curriculum.enabled=true \
     ++algorithm.dememwm.curriculum.full_stage_start_step=20000 \

tests/test_dememwm_config_static.py CHANGED Viewed

@@ -72,8 +72,6 @@ def test_full_scripts_use_consumed_contract_overrides():
     required = [
         "algorithm.dememwm.dynamic.exclude_latest_local_frames=4",
         "algorithm.dememwm.revisit.deterministic_pose_retrieval=true",
-        "algorithm.dememwm.revisit.fov_overlap_threshold=0.30",
-        "algorithm.dememwm.revisit.high_quality_fov_threshold=0.70",
         "algorithm.dememwm.revisit.pose_preselect_topk=64",
         "algorithm.dememwm.revisit.fov_yaw_samples=25",
         "algorithm.dememwm.revisit.fov_pitch_samples=20",
@@ -98,9 +96,18 @@ def test_full_scripts_use_consumed_contract_overrides():
         "algorithm.dememwm.revisit.generated_penalty",
         "algorithm.dememwm.rollout.",
     ]
-    for rel in ("scripts/dememwm_full_train.slurm", "scripts/dememwm_full_eval.slurm"):
         text = Path(rel).read_text()
-        for token in required:
             assert token in text, f"{token} missing from {rel}"
         for token in stale:
             assert token not in text, f"stale {token} override remains in {rel}"
@@ -145,7 +152,6 @@ def test_revisit_retrieval_is_deterministic_fov_plucker_contract():
         "valid_revisit_mask",
         "revisit_candidate_frame_count",
         "valid_candidate_label_count",
-        "valid_revisit_target_count",
         "valid_revisit_frame_count",
         "no_valid_revisit_count",
         "revisit_selected_frame_count",
@@ -180,7 +186,6 @@ def test_eval_ablation_and_noise_bucket_logging_contracts():
     schedules = Path("algorithms/worldmem/dememwm/schedules.py").read_text()
     diagnostics = Path("algorithms/worldmem/dememwm/diagnostics.py").read_text()
     algorithm = Path("algorithms/worldmem/dememwm/algorithm.py").read_text()
-    matrix = Path("scripts/dememwm_eval_ablation_matrix.sh").read_text()
     for branch in [
         "memory_off",
         "A_only",
@@ -197,7 +202,6 @@ def test_eval_ablation_and_noise_bucket_logging_contracts():
         "local_context_overlap_fake_revisit",
     ]:
         assert branch in schedules
-        assert branch in matrix
     for token in [
         "noise_bucket_from_denoising_fraction",
         "noise_bucket_from_noise_levels",

     required = [
         "algorithm.dememwm.dynamic.exclude_latest_local_frames=4",
         "algorithm.dememwm.revisit.deterministic_pose_retrieval=true",
         "algorithm.dememwm.revisit.pose_preselect_topk=64",
         "algorithm.dememwm.revisit.fov_yaw_samples=25",
         "algorithm.dememwm.revisit.fov_pitch_samples=20",
         "algorithm.dememwm.revisit.generated_penalty",
         "algorithm.dememwm.rollout.",
     ]
+    expected_by_script = {
+        "scripts/dememwm_full_train.slurm": [
+            "algorithm.dememwm.revisit.fov_overlap_threshold=0.60",
+        ],
+        "scripts/dememwm_full_eval.slurm": [
+            "algorithm.dememwm.revisit.fov_overlap_threshold=0.30",
+            "algorithm.dememwm.revisit.high_quality_fov_threshold=0.70",
+        ],
+    }
+    for rel, script_specific_required in expected_by_script.items():
         text = Path(rel).read_text()
+        for token in required + script_specific_required:
             assert token in text, f"{token} missing from {rel}"
         for token in stale:
             assert token not in text, f"stale {token} override remains in {rel}"
         "valid_revisit_mask",
         "revisit_candidate_frame_count",
         "valid_candidate_label_count",
         "valid_revisit_frame_count",
         "no_valid_revisit_count",
         "revisit_selected_frame_count",
     schedules = Path("algorithms/worldmem/dememwm/schedules.py").read_text()
     diagnostics = Path("algorithms/worldmem/dememwm/diagnostics.py").read_text()
     algorithm = Path("algorithms/worldmem/dememwm/algorithm.py").read_text()
     for branch in [
         "memory_off",
         "A_only",
         "local_context_overlap_fake_revisit",
     ]:
         assert branch in schedules
     for token in [
         "noise_bucket_from_denoising_fraction",
         "noise_bucket_from_noise_levels",

tests/test_dememwm_memory.py CHANGED Viewed

@@ -44,12 +44,46 @@ def test_all_false_masks_are_valid_abstention_outputs():
     assert mask.sum().item() == 0
-def test_budgets_cap_records_and_slots():
     bank = CausalMemoryBank(max_records=10)
     for f in range(6):
         bank.add_record(_record(f, slots=2))
-    records = bank.query(MemoryBankQuery(target_frame=10, max_records=2, max_slots=3))
     assert len(records) == 2
-    tokens, mask = stack_record_tokens(records, max_slots=3)
     assert tokens.shape[0] == 3
     assert mask.shape[0] == 3

     assert mask.sum().item() == 0
+def test_query_caps_records_and_stack_uses_target_slots():
     bank = CausalMemoryBank(max_records=10)
     for f in range(6):
         bank.add_record(_record(f, slots=2))
+    records = bank.query(MemoryBankQuery(target_frame=10, max_records=2))
     assert len(records) == 2
+    tokens, mask = stack_record_tokens(records, target_slots=3)
     assert tokens.shape[0] == 3
     assert mask.shape[0] == 3
+def test_target_slots_ignore_masked_slots_when_stacking_records():
+    invalid = MemoryRecord(
+        tokens=torch.ones(4, 4),
+        mask=torch.zeros(4, dtype=torch.bool),
+        source_start=0,
+        source_end=1,
+        frame_indices=torch.tensor([0]),
+        pose=None,
+        source_type=MemorySourceType.REVISIT,
+        is_generated=False,
+        chunk_id="invalid",
+    )
+    valid = MemoryRecord(
+        tokens=torch.ones(2, 4) * 2,
+        mask=torch.ones(2, dtype=torch.bool),
+        source_start=1,
+        source_end=2,
+        frame_indices=torch.tensor([1]),
+        pose=None,
+        source_type=MemorySourceType.REVISIT,
+        is_generated=False,
+        chunk_id="valid",
+    )
+    bank = CausalMemoryBank()
+    bank.add_record(invalid)
+    bank.add_record(valid)
+    records = bank.query(MemoryBankQuery(target_frame=3))
+    tokens, mask = stack_record_tokens(records, target_slots=2)
+    assert mask.tolist() == [True, True]
+    assert torch.equal(tokens, torch.ones(2, 4) * 2)

tests/test_dememwm_noise_bucket.py CHANGED Viewed

@@ -92,7 +92,6 @@ def test_noise_bucket_log_allowlist_keeps_target_counts_only():
         "noise_bucket_low_target_count",
         "revisit_candidate_frame_count",
         "valid_revisit_frame_count",
-        "valid_revisit_target_count",
         "revisit_selected_frame_count",
         "revisit_frame_fov_overlap_mean",
         "revisit_best_selected_frame_fov_overlap_mean",

         "noise_bucket_low_target_count",
         "revisit_candidate_frame_count",
         "valid_revisit_frame_count",
         "revisit_selected_frame_count",
         "revisit_frame_fov_overlap_mean",
         "revisit_best_selected_frame_fov_overlap_mean",

tests/test_dememwm_preselection.py CHANGED Viewed

@@ -56,6 +56,50 @@ def test_revisit_local_context_exclusion_uses_n_tokens_times_frame_stack():
     assert harness._local_context_exclusion_frames() == 8
 def test_diverse_anchor_selection_uses_context_frames_not_literal_limit():
     harness = Harness()
     harness.context_frames = 2
@@ -92,6 +136,31 @@ def test_diverse_anchor_selection_uses_context_frames_not_literal_limit():
     assert diag["preselected_anchor_projected_frame_count"] == 2
 def test_preselected_memory_banks_project_only_selected_frames():
     harness = Harness()
     latents = torch.randn(20, 1, 3, 2, 2)

     assert harness._local_context_exclusion_frames() == 8
+def test_diverse_anchor_selection_does_not_repeat_tied_pose_indices():
+    harness = Harness()
+    source_positions = torch.arange(5)
+    poses = torch.zeros((5, 5), dtype=torch.float32)
+    selected = harness._select_diverse_anchor_positions(source_positions, poses, 4)
+    assert selected.tolist() == [0, 1, 2, 3]
+def test_diverse_anchor_selection_seeds_from_widest_pose_pair():
+    harness = Harness()
+    source_positions = torch.arange(4)
+    poses = torch.tensor([[0.0], [-10.0], [10.0], [0.1]], dtype=torch.float32)
+    selected = harness._select_diverse_anchor_positions(source_positions, poses, 2)
+    assert selected.tolist() == [1, 2]
+def test_cached_revisit_prefilter_keeps_only_causal_records():
+    harness = Harness()
+    def record(frame: int) -> MemoryRecord:
+        return MemoryRecord(
+            tokens=torch.zeros((1, 8)),
+            mask=torch.ones(1, dtype=torch.bool),
+            source_start=frame,
+            source_end=frame + 1,
+            frame_indices=torch.tensor([frame]),
+            pose=None,
+            source_type=MemorySourceType.REVISIT,
+            is_generated=False,
+            chunk_id=f"revisit_{frame}",
+        )
+    selected = harness._causal_cached_revisit_records(
+        (record(0), record(2), record(5)),
+        target_frame=3,
+    )
+    assert [record.source_start for record in selected] == [0, 2]
 def test_diverse_anchor_selection_uses_context_frames_not_literal_limit():
     harness = Harness()
     harness.context_frames = 2
     assert diag["preselected_anchor_projected_frame_count"] == 2
+def test_streaming_diverse_anchor_selection_uses_context_frames():
+    harness = Harness()
+    harness.context_frames = 2
+    latents = torch.randn(8, 1, 3, 2, 2)
+    frame_indices = torch.arange(8)[:, None]
+    poses = torch.zeros((8, 1, 5), dtype=torch.float32)
+    anchor_banks, _ = harness._build_streaming_cache_records(
+        source_latents=latents,
+        source_frame_indices=frame_indices,
+        source_is_generated=None,
+        pose=poses,
+        action=None,
+        allow_generated_anchor=False,
+        anchor_indices=[0, 1, 2, 3],
+        anchor_pool_h=1,
+        anchor_pool_w=1,
+        anchor_diverse=True,
+        token_patch_size=2,
+    )
+    assert [int(record.frame_indices.item()) for record in anchor_banks[0].records] == [0, 1]
+    assert harness.project_call_lengths == [2]
 def test_preselected_memory_banks_project_only_selected_frames():
     harness = Harness()
     latents = torch.randn(20, 1, 3, 2, 2)

tests/test_dememwm_retrieval.py CHANGED Viewed

@@ -96,7 +96,6 @@ def test_revisit_candidates_require_causal_c_short_gap():
     assert result.diagnostics["valid_revisit_frame_count"] == 1
     assert result.diagnostics["valid_revisit_count"] == 1
     assert result.diagnostics["valid_candidate_label_count"] == 1
-    assert result.diagnostics["valid_revisit_target_count"] == 1
     assert result.diagnostics["revisit_min_gap_to_target"] == 5
     assert result.diagnostics["revisit_vectorized_frame_scorer_used"] == 1
@@ -106,7 +105,6 @@ def test_revisit_abstains_when_no_valid_candidate():
     assert result.records == []
     assert result.diagnostics["abstained"] is True
     assert result.diagnostics["valid_revisit_mask"] == 0
-    assert result.diagnostics["valid_revisit_target_count"] == 0
     assert result.diagnostics["no_valid_revisit_count"] == 1
@@ -155,7 +153,6 @@ def test_fov_threshold_filters_candidates_without_action():
     assert result.diagnostics["selected_frame_record_ids"] == ["c0"]
     assert result.diagnostics["valid_revisit_frame_count"] == 1
     assert result.diagnostics["valid_revisit_count"] == 1
-    assert result.diagnostics["valid_revisit_target_count"] == 1
     assert result.diagnostics["best_selected_fov_overlap"] == 1.0
     assert result.diagnostics["revisit_best_selected_fov_overlap_max"] == 1.0
     assert result.diagnostics["best_selected_gap_frames"] == 10
@@ -206,7 +203,6 @@ def test_selected_frame_carries_frame_metadata_for_projection():
     assert result.records[0].metadata["dememwm_selected_frame_passes_high_quality"] is True
     assert result.diagnostics["best_selected_frame_index"] == 1
     assert result.diagnostics["best_selected_frame_fov_overlap"] == 1.0
-    assert result.diagnostics["valid_revisit_target_count"] == 1
 def test_high_quality_threshold_is_selected_target_diagnostic_only():
@@ -221,7 +217,6 @@ def test_high_quality_threshold_is_selected_target_diagnostic_only():
     )
     assert result.diagnostics["selected_frame_record_ids"] == ["c0"]
     assert result.diagnostics["valid_revisit_count"] == 1
-    assert result.diagnostics["valid_revisit_target_count"] == 0
     assert 0.30 <= result.diagnostics["best_selected_fov_overlap"] < 0.70

     assert result.diagnostics["valid_revisit_frame_count"] == 1
     assert result.diagnostics["valid_revisit_count"] == 1
     assert result.diagnostics["valid_candidate_label_count"] == 1
     assert result.diagnostics["revisit_min_gap_to_target"] == 5
     assert result.diagnostics["revisit_vectorized_frame_scorer_used"] == 1
     assert result.records == []
     assert result.diagnostics["abstained"] is True
     assert result.diagnostics["valid_revisit_mask"] == 0
     assert result.diagnostics["no_valid_revisit_count"] == 1
     assert result.diagnostics["selected_frame_record_ids"] == ["c0"]
     assert result.diagnostics["valid_revisit_frame_count"] == 1
     assert result.diagnostics["valid_revisit_count"] == 1
     assert result.diagnostics["best_selected_fov_overlap"] == 1.0
     assert result.diagnostics["revisit_best_selected_fov_overlap_max"] == 1.0
     assert result.diagnostics["best_selected_gap_frames"] == 10
     assert result.records[0].metadata["dememwm_selected_frame_passes_high_quality"] is True
     assert result.diagnostics["best_selected_frame_index"] == 1
     assert result.diagnostics["best_selected_frame_fov_overlap"] == 1.0
 def test_high_quality_threshold_is_selected_target_diagnostic_only():
     )
     assert result.diagnostics["selected_frame_record_ids"] == ["c0"]
     assert result.diagnostics["valid_revisit_count"] == 1
     assert 0.30 <= result.diagnostics["best_selected_fov_overlap"] < 0.70

tests/test_dememwm_stream_grad.py CHANGED Viewed

@@ -23,7 +23,7 @@ def test_records_to_stream_preserves_grad_to_record_tokens():
     tokens, mask, max_source = MemoryDiTMixin._records_to_stream(
         object(),
         [record],
-        max_tokens=4,
         hidden_size=4,
         device=torch.device("cpu"),
         dtype=torch.float32,

     tokens, mask, max_source = MemoryDiTMixin._records_to_stream(
         object(),
         [record],
+        target_slots=4,
         hidden_size=4,
         device=torch.device("cpu"),
         dtype=torch.float32,

train_dememwm_full_berzelius.sh CHANGED Viewed

@@ -23,10 +23,10 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 export WANDB_DISABLED=true
 export HYDRA_FULL_ERROR=1
-OUTPUT_DIR=/proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/dememwm_full_berzelius_8a100_bs8_global64_350k
 srun python -m main \
-    +name=train_dememwm_full_berzelius_8a100_bs8_global64_350k \
     +output_dir="${OUTPUT_DIR}/" \
     auto_resume=true \
     experiment.tasks=[training] \
@@ -40,7 +40,7 @@ srun python -m main \
     dataset.save_dir=/proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft \
     dataset.precomputed_feature_dir=/proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft/vae_features \
     dataset.n_frames=1000 \
-    +dataset.n_frames_valid=1100 \
     +dataset.customized_validation=true \
     +dataset.memory_condition_length=0 \
     +dataset.wo_updown=false \
@@ -68,8 +68,7 @@ srun python -m main \
     ++algorithm.dememwm.dynamic.recent_frames=4 \
     ++algorithm.dememwm.revisit.enabled=true \
     ++algorithm.dememwm.revisit.deterministic_pose_retrieval=true \
-    ++algorithm.dememwm.revisit.fov_overlap_threshold=0.30 \
-    ++algorithm.dememwm.revisit.high_quality_fov_threshold=0.70 \
     ++algorithm.dememwm.revisit.pose_preselect_topk=64 \
     ++algorithm.dememwm.revisit.fov_yaw_samples=25 \
     ++algorithm.dememwm.revisit.fov_pitch_samples=20 \
@@ -86,7 +85,6 @@ srun python -m main \
     ++algorithm.dememwm.cache.no_evict=true \
     ++algorithm.dememwm.cache.clear_between_videos=true \
     ++algorithm.dememwm.cache.max_records=null \
-    ++algorithm.dememwm.cache.max_slots=null \
     ++algorithm.dememwm.cache.on_capacity_exceeded=warn \
     ++algorithm.dememwm.curriculum.enabled=true \
     ++algorithm.dememwm.curriculum.full_stage_start_step=20000 \
@@ -95,7 +93,7 @@ srun python -m main \
     ++algorithm.dememwm.curriculum.lr.dememwm_modules=4.0e-5 \
     ++algorithm.dememwm.curriculum.lr.memory_adapters=4.0e-5 \
     ++algorithm.dememwm.curriculum.lr.full_dit=1.0e-5 \
-    experiment.training.batch_size=8 \
     experiment.training.optim.accumulate_grad_batches=1 \
     experiment.validation.batch_size=1 \
     experiment.validation.limit_batch=8 \

 export WANDB_DISABLED=true
 export HYDRA_FULL_ERROR=1
+OUTPUT_DIR=/proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/dememwm_full_berzelius_8a100_bs16_global128_350k
 srun python -m main \
+    +name=train_dememwm_full_berzelius_8a100_bs16_global128_350k \
     +output_dir="${OUTPUT_DIR}/" \
     auto_resume=true \
     experiment.tasks=[training] \
     dataset.save_dir=/proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft \
     dataset.precomputed_feature_dir=/proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft/vae_features \
     dataset.n_frames=1000 \
+    +dataset.n_frames_valid=700 \
     +dataset.customized_validation=true \
     +dataset.memory_condition_length=0 \
     +dataset.wo_updown=false \
     ++algorithm.dememwm.dynamic.recent_frames=4 \
     ++algorithm.dememwm.revisit.enabled=true \
     ++algorithm.dememwm.revisit.deterministic_pose_retrieval=true \
+    ++algorithm.dememwm.revisit.fov_overlap_threshold=0.60 \
     ++algorithm.dememwm.revisit.pose_preselect_topk=64 \
     ++algorithm.dememwm.revisit.fov_yaw_samples=25 \
     ++algorithm.dememwm.revisit.fov_pitch_samples=20 \
     ++algorithm.dememwm.cache.no_evict=true \
     ++algorithm.dememwm.cache.clear_between_videos=true \
     ++algorithm.dememwm.cache.max_records=null \
     ++algorithm.dememwm.cache.on_capacity_exceeded=warn \
     ++algorithm.dememwm.curriculum.enabled=true \
     ++algorithm.dememwm.curriculum.full_stage_start_step=20000 \
     ++algorithm.dememwm.curriculum.lr.dememwm_modules=4.0e-5 \
     ++algorithm.dememwm.curriculum.lr.memory_adapters=4.0e-5 \
     ++algorithm.dememwm.curriculum.lr.full_dit=1.0e-5 \
+    experiment.training.batch_size=16 \
     experiment.training.optim.accumulate_grad_batches=1 \
     experiment.validation.batch_size=1 \
     experiment.validation.limit_batch=8 \