Add files using upload-large-folder tool

Browse files

Files changed (4) hide show

maze/data_process.py +21 -7
maze/maze/checkpoints/Wan2.1-I2V-14B-720P_full_0223/epoch-0.safetensors +3 -0
maze/maze/checkpoints/Wan2.1-I2V-14B-720P_full_0223/epoch-1.safetensors +3 -0
maze/maze_processor.py +130 -23

maze/data_process.py CHANGED Viewed

@@ -15,6 +15,9 @@ Usage:
     # Evaluate result videos
     python maze_video_gen.py eval result_videos/ --text-dir maze/texts
     # Verify a pre-extracted JSON
     python maze_video_gen.py verify results.json --text-dir maze/texts
 """
@@ -369,6 +372,7 @@ def eval_videos(
     output_json: Optional[str] = None,
     gt_json: Optional[str] = None,
     strict: bool = True,
 ):
     """
     Evaluate a directory of result videos against ground-truth mazes.
@@ -381,13 +385,14 @@ def eval_videos(
     Matching convention:
         Video ``<stem>.mp4``  →  Text ``<stem>.txt``  in *text_dir*.
-        Common stems: ``size8_000``, ``size16_042``, etc.
     Args:
         video_dir:   Directory containing result .mp4 files.
         text_dir:    Directory containing ground-truth maze .txt files.
-        output_json: Path to save extracted paths as JSON (default: video_dir/0_result.json).
         gt_json:     Optional ground-truth answer JSON for accuracy by path length.
     """
     proc = MazeProcessor()
     vid_root = Path(video_dir)
@@ -408,6 +413,8 @@ def eval_videos(
     print(f"Found {len(videos)} result videos in {vid_root}")
     print(f"Text dir: {txt_root}")
     # --- Phase 1: Extract paths from last frames ---
     extracted: Dict[str, str] = {}
@@ -415,8 +422,8 @@ def eval_videos(
     missing_frame = 0
     for vpath in tqdm(videos, desc="Extracting paths"):
-        stem = vpath.stem                        # e.g. "size8_000"
-        stem = stem.replace('_gen', '') # Remove `_gen` suffix
         txt_path = txt_root / f"{stem}.txt"
         if not txt_path.exists():
@@ -438,8 +445,11 @@ def eval_videos(
             grid_raw=maze["grid_raw"],
             size=maze["size"],
             start=maze["start"],
         )
-        extracted[f"{stem}.png"] = udrl   # keyed by image name for consistency
     # Save extracted paths
     with open(output_json, "w", encoding="utf-8") as f:
@@ -473,6 +483,7 @@ def eval_videos(
     print(f"Evaluated          : {total_valid}")
     print(f"Correctly Solved   : {correct}")
     print(f"Accuracy           : {acc:.2f}%")
     print(f"{'-' * 50}")
     # Breakdown by maze size
@@ -526,14 +537,13 @@ def _compare_with_gt(
         print(f"  Warning: could not load ground-truth JSON: {gt_json_path}")
         return
-    bins: Dict[str, Dict[str, int]] = {}  # "10-19" -> {total, correct}
     for name, pred_udrl in extracted.items():
         if name not in gt:
             continue
         gt_udrl = gt[name]
         gt_len = len(gt_udrl)
-        # Bin by path length (decades)
         lo = (gt_len // 10) * 10
         hi = lo + 9
         label = f"{lo:3d}-{hi:3d}"
@@ -563,6 +573,7 @@ def verify_results(json_file: str, text_dir: str, strict: bool = True):
     Args:
         json_file: Path to JSON with {name: udrl_string} predictions.
         text_dir:  Directory containing maze .txt files.
     """
     proc = MazeProcessor()
     json_path = Path(json_file)
@@ -635,6 +646,8 @@ def parse_args():
                     help="Optional ground-truth path.json for length-binned accuracy")
     ev.add_argument("--strict", action="store_true",
                     help="Strict verification (exact UDRL match) vs leniency on no-op moves")
     # --- verify ---
     ver = sub.add_parser("verify", help="Verify a pre-extracted JSON of UDRL paths")
@@ -660,6 +673,7 @@ if __name__ == "__main__":
             output_json=args.output_json,
             gt_json=args.gt_json,
             strict=args.strict,
         )
     elif args.command == "verify":

     # Evaluate result videos
     python maze_video_gen.py eval result_videos/ --text-dir maze/texts
+    # Evaluate with backtracking path extraction
+    python maze_video_gen.py eval result_videos/ --text-dir maze/texts --recursive
     # Verify a pre-extracted JSON
     python maze_video_gen.py verify results.json --text-dir maze/texts
 """
     output_json: Optional[str] = None,
     gt_json: Optional[str] = None,
     strict: bool = True,
+    recursive: bool = False,
 ):
     """
     Evaluate a directory of result videos against ground-truth mazes.
     Matching convention:
         Video ``<stem>.mp4``  →  Text ``<stem>.txt``  in *text_dir*.
     Args:
         video_dir:   Directory containing result .mp4 files.
         text_dir:    Directory containing ground-truth maze .txt files.
+        output_json: Path to save extracted paths as JSON.
         gt_json:     Optional ground-truth answer JSON for accuracy by path length.
+        strict:      Strict verification mode.
+        recursive:   Use backtracking DFS for red-path extraction instead of greedy.
     """
     proc = MazeProcessor()
     vid_root = Path(video_dir)
     print(f"Found {len(videos)} result videos in {vid_root}")
     print(f"Text dir: {txt_root}")
+    print(f"Mode: {'recursive (backtracking)' if recursive else 'greedy'}, "
+          f"strict={'yes' if strict else 'no'}")
     # --- Phase 1: Extract paths from last frames ---
     extracted: Dict[str, str] = {}
     missing_frame = 0
     for vpath in tqdm(videos, desc="Extracting paths"):
+        stem = vpath.stem
+        stem = stem.replace('_gen', '')
         txt_path = txt_root / f"{stem}.txt"
         if not txt_path.exists():
             grid_raw=maze["grid_raw"],
             size=maze["size"],
             start=maze["start"],
+            recursive=recursive,
+            end=maze["end"],
+            strict=strict,
         )
+        extracted[f"{stem}.png"] = udrl
     # Save extracted paths
     with open(output_json, "w", encoding="utf-8") as f:
     print(f"Evaluated          : {total_valid}")
     print(f"Correctly Solved   : {correct}")
     print(f"Accuracy           : {acc:.2f}%")
+    print(f"Extraction Mode    : {'recursive' if recursive else 'greedy'}")
     print(f"{'-' * 50}")
     # Breakdown by maze size
         print(f"  Warning: could not load ground-truth JSON: {gt_json_path}")
         return
+    bins: Dict[str, Dict[str, int]] = {}
     for name, pred_udrl in extracted.items():
         if name not in gt:
             continue
         gt_udrl = gt[name]
         gt_len = len(gt_udrl)
         lo = (gt_len // 10) * 10
         hi = lo + 9
         label = f"{lo:3d}-{hi:3d}"
     Args:
         json_file: Path to JSON with {name: udrl_string} predictions.
         text_dir:  Directory containing maze .txt files.
+        strict:    Strict verification mode.
     """
     proc = MazeProcessor()
     json_path = Path(json_file)
                     help="Optional ground-truth path.json for length-binned accuracy")
     ev.add_argument("--strict", action="store_true",
                     help="Strict verification (exact UDRL match) vs leniency on no-op moves")
+    ev.add_argument("--recursive", action="store_true",
+                    help="Use backtracking DFS for path extraction instead of greedy")
     # --- verify ---
     ver = sub.add_parser("verify", help="Verify a pre-extracted JSON of UDRL paths")
             output_json=args.output_json,
             gt_json=args.gt_json,
             strict=args.strict,
+            recursive=args.recursive,
         )
     elif args.command == "verify":

maze/maze/checkpoints/Wan2.1-I2V-14B-720P_full_0223/epoch-0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fb7fa5e8c1b4287ac9ed4a0e462c95a6f06ecfe078554c584169aed72b5da79
+size 32789894056

maze/maze/checkpoints/Wan2.1-I2V-14B-720P_full_0223/epoch-1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e22ad03b66272cd712be2d0767782d9c5f81df8a55f2153406ab9791fee4ffea
+size 32789894056

maze/maze_processor.py CHANGED Viewed

@@ -162,7 +162,7 @@ class MazeProcessor:
     # ==================== Verification ====================
-    def verify_path(self, grid: Grid, start: Tuple, end: Tuple, udrl: str, strict : bool = True) -> bool:
         """Verify that *udrl* is a wall-respecting walk from *start* to *end*."""
         n = len(grid)
         r, c = start
@@ -419,29 +419,22 @@ class MazeProcessor:
     # ==================== Red-Path Extraction ====================
-    def extract_path_from_pixels(
         self,
         pixels: np.ndarray,
-        grid_raw: List[List[int]],
         size: int,
-        start: Tuple[int, int],
         pixel_threshold: float = 0.01,
-    ) -> str:
         """
-        Detect red path in an RGB pixel array and return UDRL.
-        Uses **floating-point** cell boundaries matching the renderer to avoid
-        misalignment on sizes that don't evenly divide the image (e.g. 24, 48).
         Args:
             pixels:          (H, W, 3) uint8 RGB array.
-            grid_raw:        Bitmask grid as list[list[int]].
             size:            Maze dimension n.
-            start:           Start coordinate (r, c).
             pixel_threshold: Min red-pixel fraction to mark a cell.
         Returns:
-            UDRL action string.
         """
         img = Image.fromarray(pixels)
         w, h = img.size
@@ -450,9 +443,6 @@ class MazeProcessor:
         r_ch, g_ch, b_ch = px[:, :, 0], px[:, :, 1], px[:, :, 2]
         red_mask = (r_ch > 100) & (r_ch > g_ch * 1.2) & (r_ch > b_ch * 1.2)
-        # Use FLOAT cell size to match render() coordinate system exactly.
-        # Integer division (h // size) drifts by up to (size-1) * fractional
-        # pixels, causing the last cells to be completely misaligned.
         cell_h_f = h / size
         cell_w_f = w / size
@@ -463,17 +453,24 @@ class MazeProcessor:
             for c in range(size):
                 x0 = int(round(c * cell_w_f))
                 x1 = int(round((c + 1) * cell_w_f))
-                # Small inward margin to avoid wall / neighbour bleed-over
-                ch = y1 - y0
-                cw = x1 - x0
-                margin_y = max(1, int(ch * 0.15))
-                margin_x = max(1, int(cw * 0.15))
                 sub = red_mask[y0 + margin_y : y1 - margin_y,
                                x0 + margin_x : x1 - margin_x]
                 if sub.size > 0 and np.mean(sub) > pixel_threshold:
                     path_grid[r, c] = True
-        # Greedy walk from start, respecting maze walls
         directions = [
             ("R", MOVES["R"]),
             ("D", MOVES["D"]),
@@ -501,13 +498,123 @@ class MazeProcessor:
                 break
         return "".join(actions)
     def extract_path_from_image(
-        self, img_path: str, grid_raw: List[List[int]], size: int, start: Tuple
     ) -> str:
         """Extract UDRL from an image file (convenience wrapper)."""
         try:
             pixels = np.array(Image.open(img_path).convert("RGB"))
-            return self.extract_path_from_pixels(pixels, grid_raw, size, start)
         except Exception:
             return ""

     # ==================== Verification ====================
+    def verify_path(self, grid: Grid, start: Tuple, end: Tuple, udrl: str, strict: bool = True) -> bool:
         """Verify that *udrl* is a wall-respecting walk from *start* to *end*."""
         n = len(grid)
         r, c = start
     # ==================== Red-Path Extraction ====================
+    def _detect_red_grid(
         self,
         pixels: np.ndarray,
         size: int,
         pixel_threshold: float = 0.01,
+    ) -> np.ndarray:
         """
+        Detect which cells contain red pixels and return a boolean grid.
         Args:
             pixels:          (H, W, 3) uint8 RGB array.
             size:            Maze dimension n.
             pixel_threshold: Min red-pixel fraction to mark a cell.
         Returns:
+            (size, size) bool ndarray — True where red path detected.
         """
         img = Image.fromarray(pixels)
         w, h = img.size
         r_ch, g_ch, b_ch = px[:, :, 0], px[:, :, 1], px[:, :, 2]
         red_mask = (r_ch > 100) & (r_ch > g_ch * 1.2) & (r_ch > b_ch * 1.2)
         cell_h_f = h / size
         cell_w_f = w / size
             for c in range(size):
                 x0 = int(round(c * cell_w_f))
                 x1 = int(round((c + 1) * cell_w_f))
+                ch_ = y1 - y0
+                cw_ = x1 - x0
+                margin_y = max(1, int(ch_ * 0.15))
+                margin_x = max(1, int(cw_ * 0.15))
                 sub = red_mask[y0 + margin_y : y1 - margin_y,
                                x0 + margin_x : x1 - margin_x]
                 if sub.size > 0 and np.mean(sub) > pixel_threshold:
                     path_grid[r, c] = True
+        return path_grid
+    def _greedy_walk(
+        self,
+        path_grid: np.ndarray,
+        grid_raw: List[List[int]],
+        size: int,
+        start: Tuple[int, int],
+    ) -> str:
+        """Greedy walk from start following red cells (original strategy)."""
         directions = [
             ("R", MOVES["R"]),
             ("D", MOVES["D"]),
                 break
         return "".join(actions)
+    def _backtrack_walk(
+        self,
+        path_grid: np.ndarray,
+        grid_raw: List[List[int]],
+        size: int,
+        start: Tuple[int, int],
+        end: Optional[Tuple[int, int]] = None,
+        strict: bool = True,
+    ) -> str:
+        """
+        Backtracking DFS walk from start following red cells.
+        When multiple neighbouring red cells are reachable, tries each branch
+        and backtracks on dead-ends — guaranteeing a complete path if one exists.
+        Args:
+            path_grid: (size, size) bool grid of detected red cells.
+            grid_raw:  Bitmask wall grid.
+            size:      Maze dimension n.
+            start:     Start coordinate (r, c).
+            end:       End coordinate; required for strict, optional otherwise.
+            strict:    If True, path must visit ALL red cells and end at *end*.
+                       If False, reaching *end* is sufficient.
+        Returns:
+            UDRL action string (empty string if no valid path found).
+        """
+        directions = [
+            ("R", MOVES["R"]),
+            ("D", MOVES["D"]),
+            ("L", MOVES["L"]),
+            ("U", MOVES["U"]),
+        ]
+        total_red = int(path_grid.sum())
+        def _dfs(r: int, c: int, visited: set, actions: List[str]) -> Optional[str]:
+            # Non-strict: reaching end is sufficient
+            if not strict and end and (r, c) == end:
+                return "".join(actions)
+            # Strict: must cover all red cells AND land on end
+            if strict and len(visited) == total_red:
+                if end is None or (r, c) == end:
+                    return "".join(actions)
+                return None
+            wval = grid_raw[r][c]
+            for act, (dr, dc, wall_ch) in directions:
+                nr, nc = r + dr, c + dc
+                if not (0 <= nr < size and 0 <= nc < size):
+                    continue
+                if (wval & WALL_MASKS[wall_ch]) != 0:
+                    continue
+                if not path_grid[nr, nc] or (nr, nc) in visited:
+                    continue
+                visited.add((nr, nc))
+                actions.append(act)
+                result = _dfs(nr, nc, visited, actions)
+                if result is not None:
+                    return result
+                actions.pop()
+                visited.remove((nr, nc))
+            return None
+        result = _dfs(start[0], start[1], {start}, [])
+        return result if result is not None else ""
+    def extract_path_from_pixels(
+        self,
+        pixels: np.ndarray,
+        grid_raw: List[List[int]],
+        size: int,
+        start: Tuple[int, int],
+        pixel_threshold: float = 0.01,
+        recursive: bool = False,
+        end: Optional[Tuple[int, int]] = None,
+        strict: bool = True,
+    ) -> str:
+        """
+        Detect red path in an RGB pixel array and return UDRL.
+        Uses floating-point cell boundaries matching the renderer to avoid
+        misalignment on sizes that don't evenly divide the image.
+        Args:
+            pixels:          (H, W, 3) uint8 RGB array.
+            grid_raw:        Bitmask grid as list[list[int]].
+            size:            Maze dimension n.
+            start:           Start coordinate (r, c).
+            pixel_threshold: Min red-pixel fraction to mark a cell.
+            recursive:       Use backtracking DFS instead of greedy walk.
+            end:             End coordinate (required for recursive strict mode).
+            strict:          For recursive mode — if True, path must visit ALL
+                             red cells and end at *end*; if False, reaching
+                             *end* is sufficient.
+        Returns:
+            UDRL action string.
+        """
+        path_grid = self._detect_red_grid(pixels, size, pixel_threshold)
+        if recursive:
+            return self._backtrack_walk(
+                path_grid, grid_raw, size, start, end=end, strict=strict
+            )
+        return self._greedy_walk(path_grid, grid_raw, size, start)
     def extract_path_from_image(
+        self, img_path: str, grid_raw: List[List[int]], size: int, start: Tuple,
+        recursive: bool = False, end: Optional[Tuple] = None, strict: bool = True,
     ) -> str:
         """Extract UDRL from an image file (convenience wrapper)."""
         try:
             pixels = np.array(Image.open(img_path).convert("RGB"))
+            return self.extract_path_from_pixels(
+                pixels, grid_raw, size, start,
+                recursive=recursive, end=end, strict=strict,
+            )
         except Exception:
             return ""