Jayce-Ping commited on
Commit
8a42298
·
verified ·
1 Parent(s): 2e9398f

Add files using upload-large-folder tool

Browse files
maze/data_process.py CHANGED
@@ -15,6 +15,9 @@ Usage:
15
  # Evaluate result videos
16
  python maze_video_gen.py eval result_videos/ --text-dir maze/texts
17
 
 
 
 
18
  # Verify a pre-extracted JSON
19
  python maze_video_gen.py verify results.json --text-dir maze/texts
20
  """
@@ -369,6 +372,7 @@ def eval_videos(
369
  output_json: Optional[str] = None,
370
  gt_json: Optional[str] = None,
371
  strict: bool = True,
 
372
  ):
373
  """
374
  Evaluate a directory of result videos against ground-truth mazes.
@@ -381,13 +385,14 @@ def eval_videos(
381
 
382
  Matching convention:
383
  Video ``<stem>.mp4`` → Text ``<stem>.txt`` in *text_dir*.
384
- Common stems: ``size8_000``, ``size16_042``, etc.
385
 
386
  Args:
387
  video_dir: Directory containing result .mp4 files.
388
  text_dir: Directory containing ground-truth maze .txt files.
389
- output_json: Path to save extracted paths as JSON (default: video_dir/0_result.json).
390
  gt_json: Optional ground-truth answer JSON for accuracy by path length.
 
 
391
  """
392
  proc = MazeProcessor()
393
  vid_root = Path(video_dir)
@@ -408,6 +413,8 @@ def eval_videos(
408
 
409
  print(f"Found {len(videos)} result videos in {vid_root}")
410
  print(f"Text dir: {txt_root}")
 
 
411
 
412
  # --- Phase 1: Extract paths from last frames ---
413
  extracted: Dict[str, str] = {}
@@ -415,8 +422,8 @@ def eval_videos(
415
  missing_frame = 0
416
 
417
  for vpath in tqdm(videos, desc="Extracting paths"):
418
- stem = vpath.stem # e.g. "size8_000"
419
- stem = stem.replace('_gen', '') # Remove `_gen` suffix
420
  txt_path = txt_root / f"{stem}.txt"
421
 
422
  if not txt_path.exists():
@@ -438,8 +445,11 @@ def eval_videos(
438
  grid_raw=maze["grid_raw"],
439
  size=maze["size"],
440
  start=maze["start"],
 
 
 
441
  )
442
- extracted[f"{stem}.png"] = udrl # keyed by image name for consistency
443
 
444
  # Save extracted paths
445
  with open(output_json, "w", encoding="utf-8") as f:
@@ -473,6 +483,7 @@ def eval_videos(
473
  print(f"Evaluated : {total_valid}")
474
  print(f"Correctly Solved : {correct}")
475
  print(f"Accuracy : {acc:.2f}%")
 
476
  print(f"{'-' * 50}")
477
 
478
  # Breakdown by maze size
@@ -526,14 +537,13 @@ def _compare_with_gt(
526
  print(f" Warning: could not load ground-truth JSON: {gt_json_path}")
527
  return
528
 
529
- bins: Dict[str, Dict[str, int]] = {} # "10-19" -> {total, correct}
530
  for name, pred_udrl in extracted.items():
531
  if name not in gt:
532
  continue
533
  gt_udrl = gt[name]
534
  gt_len = len(gt_udrl)
535
 
536
- # Bin by path length (decades)
537
  lo = (gt_len // 10) * 10
538
  hi = lo + 9
539
  label = f"{lo:3d}-{hi:3d}"
@@ -563,6 +573,7 @@ def verify_results(json_file: str, text_dir: str, strict: bool = True):
563
  Args:
564
  json_file: Path to JSON with {name: udrl_string} predictions.
565
  text_dir: Directory containing maze .txt files.
 
566
  """
567
  proc = MazeProcessor()
568
  json_path = Path(json_file)
@@ -635,6 +646,8 @@ def parse_args():
635
  help="Optional ground-truth path.json for length-binned accuracy")
636
  ev.add_argument("--strict", action="store_true",
637
  help="Strict verification (exact UDRL match) vs leniency on no-op moves")
 
 
638
 
639
  # --- verify ---
640
  ver = sub.add_parser("verify", help="Verify a pre-extracted JSON of UDRL paths")
@@ -660,6 +673,7 @@ if __name__ == "__main__":
660
  output_json=args.output_json,
661
  gt_json=args.gt_json,
662
  strict=args.strict,
 
663
  )
664
 
665
  elif args.command == "verify":
 
15
  # Evaluate result videos
16
  python maze_video_gen.py eval result_videos/ --text-dir maze/texts
17
 
18
+ # Evaluate with backtracking path extraction
19
+ python maze_video_gen.py eval result_videos/ --text-dir maze/texts --recursive
20
+
21
  # Verify a pre-extracted JSON
22
  python maze_video_gen.py verify results.json --text-dir maze/texts
23
  """
 
372
  output_json: Optional[str] = None,
373
  gt_json: Optional[str] = None,
374
  strict: bool = True,
375
+ recursive: bool = False,
376
  ):
377
  """
378
  Evaluate a directory of result videos against ground-truth mazes.
 
385
 
386
  Matching convention:
387
  Video ``<stem>.mp4`` → Text ``<stem>.txt`` in *text_dir*.
 
388
 
389
  Args:
390
  video_dir: Directory containing result .mp4 files.
391
  text_dir: Directory containing ground-truth maze .txt files.
392
+ output_json: Path to save extracted paths as JSON.
393
  gt_json: Optional ground-truth answer JSON for accuracy by path length.
394
+ strict: Strict verification mode.
395
+ recursive: Use backtracking DFS for red-path extraction instead of greedy.
396
  """
397
  proc = MazeProcessor()
398
  vid_root = Path(video_dir)
 
413
 
414
  print(f"Found {len(videos)} result videos in {vid_root}")
415
  print(f"Text dir: {txt_root}")
416
+ print(f"Mode: {'recursive (backtracking)' if recursive else 'greedy'}, "
417
+ f"strict={'yes' if strict else 'no'}")
418
 
419
  # --- Phase 1: Extract paths from last frames ---
420
  extracted: Dict[str, str] = {}
 
422
  missing_frame = 0
423
 
424
  for vpath in tqdm(videos, desc="Extracting paths"):
425
+ stem = vpath.stem
426
+ stem = stem.replace('_gen', '')
427
  txt_path = txt_root / f"{stem}.txt"
428
 
429
  if not txt_path.exists():
 
445
  grid_raw=maze["grid_raw"],
446
  size=maze["size"],
447
  start=maze["start"],
448
+ recursive=recursive,
449
+ end=maze["end"],
450
+ strict=strict,
451
  )
452
+ extracted[f"{stem}.png"] = udrl
453
 
454
  # Save extracted paths
455
  with open(output_json, "w", encoding="utf-8") as f:
 
483
  print(f"Evaluated : {total_valid}")
484
  print(f"Correctly Solved : {correct}")
485
  print(f"Accuracy : {acc:.2f}%")
486
+ print(f"Extraction Mode : {'recursive' if recursive else 'greedy'}")
487
  print(f"{'-' * 50}")
488
 
489
  # Breakdown by maze size
 
537
  print(f" Warning: could not load ground-truth JSON: {gt_json_path}")
538
  return
539
 
540
+ bins: Dict[str, Dict[str, int]] = {}
541
  for name, pred_udrl in extracted.items():
542
  if name not in gt:
543
  continue
544
  gt_udrl = gt[name]
545
  gt_len = len(gt_udrl)
546
 
 
547
  lo = (gt_len // 10) * 10
548
  hi = lo + 9
549
  label = f"{lo:3d}-{hi:3d}"
 
573
  Args:
574
  json_file: Path to JSON with {name: udrl_string} predictions.
575
  text_dir: Directory containing maze .txt files.
576
+ strict: Strict verification mode.
577
  """
578
  proc = MazeProcessor()
579
  json_path = Path(json_file)
 
646
  help="Optional ground-truth path.json for length-binned accuracy")
647
  ev.add_argument("--strict", action="store_true",
648
  help="Strict verification (exact UDRL match) vs leniency on no-op moves")
649
+ ev.add_argument("--recursive", action="store_true",
650
+ help="Use backtracking DFS for path extraction instead of greedy")
651
 
652
  # --- verify ---
653
  ver = sub.add_parser("verify", help="Verify a pre-extracted JSON of UDRL paths")
 
673
  output_json=args.output_json,
674
  gt_json=args.gt_json,
675
  strict=args.strict,
676
+ recursive=args.recursive,
677
  )
678
 
679
  elif args.command == "verify":
maze/maze/checkpoints/Wan2.1-I2V-14B-720P_full_0223/epoch-0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb7fa5e8c1b4287ac9ed4a0e462c95a6f06ecfe078554c584169aed72b5da79
3
+ size 32789894056
maze/maze/checkpoints/Wan2.1-I2V-14B-720P_full_0223/epoch-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22ad03b66272cd712be2d0767782d9c5f81df8a55f2153406ab9791fee4ffea
3
+ size 32789894056
maze/maze_processor.py CHANGED
@@ -162,7 +162,7 @@ class MazeProcessor:
162
 
163
  # ==================== Verification ====================
164
 
165
- def verify_path(self, grid: Grid, start: Tuple, end: Tuple, udrl: str, strict : bool = True) -> bool:
166
  """Verify that *udrl* is a wall-respecting walk from *start* to *end*."""
167
  n = len(grid)
168
  r, c = start
@@ -419,29 +419,22 @@ class MazeProcessor:
419
 
420
  # ==================== Red-Path Extraction ====================
421
 
422
- def extract_path_from_pixels(
423
  self,
424
  pixels: np.ndarray,
425
- grid_raw: List[List[int]],
426
  size: int,
427
- start: Tuple[int, int],
428
  pixel_threshold: float = 0.01,
429
- ) -> str:
430
  """
431
- Detect red path in an RGB pixel array and return UDRL.
432
-
433
- Uses **floating-point** cell boundaries matching the renderer to avoid
434
- misalignment on sizes that don't evenly divide the image (e.g. 24, 48).
435
 
436
  Args:
437
  pixels: (H, W, 3) uint8 RGB array.
438
- grid_raw: Bitmask grid as list[list[int]].
439
  size: Maze dimension n.
440
- start: Start coordinate (r, c).
441
  pixel_threshold: Min red-pixel fraction to mark a cell.
442
 
443
  Returns:
444
- UDRL action string.
445
  """
446
  img = Image.fromarray(pixels)
447
  w, h = img.size
@@ -450,9 +443,6 @@ class MazeProcessor:
450
  r_ch, g_ch, b_ch = px[:, :, 0], px[:, :, 1], px[:, :, 2]
451
  red_mask = (r_ch > 100) & (r_ch > g_ch * 1.2) & (r_ch > b_ch * 1.2)
452
 
453
- # Use FLOAT cell size to match render() coordinate system exactly.
454
- # Integer division (h // size) drifts by up to (size-1) * fractional
455
- # pixels, causing the last cells to be completely misaligned.
456
  cell_h_f = h / size
457
  cell_w_f = w / size
458
 
@@ -463,17 +453,24 @@ class MazeProcessor:
463
  for c in range(size):
464
  x0 = int(round(c * cell_w_f))
465
  x1 = int(round((c + 1) * cell_w_f))
466
- # Small inward margin to avoid wall / neighbour bleed-over
467
- ch = y1 - y0
468
- cw = x1 - x0
469
- margin_y = max(1, int(ch * 0.15))
470
- margin_x = max(1, int(cw * 0.15))
471
  sub = red_mask[y0 + margin_y : y1 - margin_y,
472
  x0 + margin_x : x1 - margin_x]
473
  if sub.size > 0 and np.mean(sub) > pixel_threshold:
474
  path_grid[r, c] = True
 
475
 
476
- # Greedy walk from start, respecting maze walls
 
 
 
 
 
 
 
477
  directions = [
478
  ("R", MOVES["R"]),
479
  ("D", MOVES["D"]),
@@ -501,13 +498,123 @@ class MazeProcessor:
501
  break
502
  return "".join(actions)
503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  def extract_path_from_image(
505
- self, img_path: str, grid_raw: List[List[int]], size: int, start: Tuple
 
506
  ) -> str:
507
  """Extract UDRL from an image file (convenience wrapper)."""
508
  try:
509
  pixels = np.array(Image.open(img_path).convert("RGB"))
510
- return self.extract_path_from_pixels(pixels, grid_raw, size, start)
 
 
 
511
  except Exception:
512
  return ""
513
 
 
162
 
163
  # ==================== Verification ====================
164
 
165
+ def verify_path(self, grid: Grid, start: Tuple, end: Tuple, udrl: str, strict: bool = True) -> bool:
166
  """Verify that *udrl* is a wall-respecting walk from *start* to *end*."""
167
  n = len(grid)
168
  r, c = start
 
419
 
420
  # ==================== Red-Path Extraction ====================
421
 
422
+ def _detect_red_grid(
423
  self,
424
  pixels: np.ndarray,
 
425
  size: int,
 
426
  pixel_threshold: float = 0.01,
427
+ ) -> np.ndarray:
428
  """
429
+ Detect which cells contain red pixels and return a boolean grid.
 
 
 
430
 
431
  Args:
432
  pixels: (H, W, 3) uint8 RGB array.
 
433
  size: Maze dimension n.
 
434
  pixel_threshold: Min red-pixel fraction to mark a cell.
435
 
436
  Returns:
437
+ (size, size) bool ndarray — True where red path detected.
438
  """
439
  img = Image.fromarray(pixels)
440
  w, h = img.size
 
443
  r_ch, g_ch, b_ch = px[:, :, 0], px[:, :, 1], px[:, :, 2]
444
  red_mask = (r_ch > 100) & (r_ch > g_ch * 1.2) & (r_ch > b_ch * 1.2)
445
 
 
 
 
446
  cell_h_f = h / size
447
  cell_w_f = w / size
448
 
 
453
  for c in range(size):
454
  x0 = int(round(c * cell_w_f))
455
  x1 = int(round((c + 1) * cell_w_f))
456
+ ch_ = y1 - y0
457
+ cw_ = x1 - x0
458
+ margin_y = max(1, int(ch_ * 0.15))
459
+ margin_x = max(1, int(cw_ * 0.15))
 
460
  sub = red_mask[y0 + margin_y : y1 - margin_y,
461
  x0 + margin_x : x1 - margin_x]
462
  if sub.size > 0 and np.mean(sub) > pixel_threshold:
463
  path_grid[r, c] = True
464
+ return path_grid
465
 
466
+ def _greedy_walk(
467
+ self,
468
+ path_grid: np.ndarray,
469
+ grid_raw: List[List[int]],
470
+ size: int,
471
+ start: Tuple[int, int],
472
+ ) -> str:
473
+ """Greedy walk from start following red cells (original strategy)."""
474
  directions = [
475
  ("R", MOVES["R"]),
476
  ("D", MOVES["D"]),
 
498
  break
499
  return "".join(actions)
500
 
501
+ def _backtrack_walk(
502
+ self,
503
+ path_grid: np.ndarray,
504
+ grid_raw: List[List[int]],
505
+ size: int,
506
+ start: Tuple[int, int],
507
+ end: Optional[Tuple[int, int]] = None,
508
+ strict: bool = True,
509
+ ) -> str:
510
+ """
511
+ Backtracking DFS walk from start following red cells.
512
+
513
+ When multiple neighbouring red cells are reachable, tries each branch
514
+ and backtracks on dead-ends — guaranteeing a complete path if one exists.
515
+
516
+ Args:
517
+ path_grid: (size, size) bool grid of detected red cells.
518
+ grid_raw: Bitmask wall grid.
519
+ size: Maze dimension n.
520
+ start: Start coordinate (r, c).
521
+ end: End coordinate; required for strict, optional otherwise.
522
+ strict: If True, path must visit ALL red cells and end at *end*.
523
+ If False, reaching *end* is sufficient.
524
+
525
+ Returns:
526
+ UDRL action string (empty string if no valid path found).
527
+ """
528
+ directions = [
529
+ ("R", MOVES["R"]),
530
+ ("D", MOVES["D"]),
531
+ ("L", MOVES["L"]),
532
+ ("U", MOVES["U"]),
533
+ ]
534
+ total_red = int(path_grid.sum())
535
+
536
+ def _dfs(r: int, c: int, visited: set, actions: List[str]) -> Optional[str]:
537
+ # Non-strict: reaching end is sufficient
538
+ if not strict and end and (r, c) == end:
539
+ return "".join(actions)
540
+ # Strict: must cover all red cells AND land on end
541
+ if strict and len(visited) == total_red:
542
+ if end is None or (r, c) == end:
543
+ return "".join(actions)
544
+ return None
545
+
546
+ wval = grid_raw[r][c]
547
+ for act, (dr, dc, wall_ch) in directions:
548
+ nr, nc = r + dr, c + dc
549
+ if not (0 <= nr < size and 0 <= nc < size):
550
+ continue
551
+ if (wval & WALL_MASKS[wall_ch]) != 0:
552
+ continue
553
+ if not path_grid[nr, nc] or (nr, nc) in visited:
554
+ continue
555
+ visited.add((nr, nc))
556
+ actions.append(act)
557
+ result = _dfs(nr, nc, visited, actions)
558
+ if result is not None:
559
+ return result
560
+ actions.pop()
561
+ visited.remove((nr, nc))
562
+ return None
563
+
564
+ result = _dfs(start[0], start[1], {start}, [])
565
+ return result if result is not None else ""
566
+
567
+ def extract_path_from_pixels(
568
+ self,
569
+ pixels: np.ndarray,
570
+ grid_raw: List[List[int]],
571
+ size: int,
572
+ start: Tuple[int, int],
573
+ pixel_threshold: float = 0.01,
574
+ recursive: bool = False,
575
+ end: Optional[Tuple[int, int]] = None,
576
+ strict: bool = True,
577
+ ) -> str:
578
+ """
579
+ Detect red path in an RGB pixel array and return UDRL.
580
+
581
+ Uses floating-point cell boundaries matching the renderer to avoid
582
+ misalignment on sizes that don't evenly divide the image.
583
+
584
+ Args:
585
+ pixels: (H, W, 3) uint8 RGB array.
586
+ grid_raw: Bitmask grid as list[list[int]].
587
+ size: Maze dimension n.
588
+ start: Start coordinate (r, c).
589
+ pixel_threshold: Min red-pixel fraction to mark a cell.
590
+ recursive: Use backtracking DFS instead of greedy walk.
591
+ end: End coordinate (required for recursive strict mode).
592
+ strict: For recursive mode — if True, path must visit ALL
593
+ red cells and end at *end*; if False, reaching
594
+ *end* is sufficient.
595
+
596
+ Returns:
597
+ UDRL action string.
598
+ """
599
+ path_grid = self._detect_red_grid(pixels, size, pixel_threshold)
600
+
601
+ if recursive:
602
+ return self._backtrack_walk(
603
+ path_grid, grid_raw, size, start, end=end, strict=strict
604
+ )
605
+ return self._greedy_walk(path_grid, grid_raw, size, start)
606
+
607
  def extract_path_from_image(
608
+ self, img_path: str, grid_raw: List[List[int]], size: int, start: Tuple,
609
+ recursive: bool = False, end: Optional[Tuple] = None, strict: bool = True,
610
  ) -> str:
611
  """Extract UDRL from an image file (convenience wrapper)."""
612
  try:
613
  pixels = np.array(Image.open(img_path).convert("RGB"))
614
+ return self.extract_path_from_pixels(
615
+ pixels, grid_raw, size, start,
616
+ recursive=recursive, end=end, strict=strict,
617
+ )
618
  except Exception:
619
  return ""
620