Add files using upload-large-folder tool

Browse files

Files changed (6) hide show

frozenlake/data_process.py +480 -0
frozenlake/frozenlake_processor.py +466 -0
maze/data_process.py +651 -0
maze/maze_processor.py +543 -0
sudoku/generate_dataset.py +198 -102
sudoku/jsonl_to_csv.py +7 -4

frozenlake/data_process.py ADDED Viewed

	@@ -0,0 +1,480 @@

+"""
+FrozenLake Video Dataset Generator — generate, eval, verify.
+Uses plain BFS solver (not networkx) for fast generation at all grid sizes.
+Usage:
+    python frozenlake_video_gen.py generate --output-dir frozenlake \
+        --sizes 8 16 32 --num-per-size 100 500 1000 --p 0.8
+    python frozenlake_video_gen.py eval result_videos/ --table-dir frozenlake/tables
+    python frozenlake_video_gen.py verify results.json --table-dir frozenlake/tables
+"""
+import json
+import csv
+import hashlib
+import random
+import re
+import argparse
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Dict, List, Optional
+import cv2
+import numpy as np
+from tqdm import tqdm
+from frozenlake_processor import FrozenLakeProcessor
+# ==================== Checkpoint ====================
+@dataclass
+class GenerationState:
+    params_hash: str
+    size_progress: Dict[int, int]
+    seen_fingerprints: List[str]
+    all_samples: List[Dict]
+    completed: bool = False
+    def to_dict(self) -> Dict:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, d: Dict) -> "GenerationState":
+        return cls(**d)
+def _params_hash(params: Dict) -> str:
+    key = {k: v for k, v in params.items() if k != "output_dir"}
+    return hashlib.md5(json.dumps(key, sort_keys=True).encode()).hexdigest()[:12]
+def load_checkpoint(output_dir: Path, params: Dict) -> Optional[GenerationState]:
+    meta = output_dir / "metadata.json"
+    if not meta.exists():
+        return None
+    with open(meta) as f:
+        data = json.load(f)
+    state = GenerationState.from_dict(data["state"])
+    expected = _params_hash(params)
+    if state.params_hash != expected:
+        print(f"⚠️  Params changed ({state.params_hash} → {expected}), starting fresh")
+        return None
+    if state.completed:
+        print("✓ Generation already completed")
+        return state
+    print(f"✓ Resuming: {sum(state.size_progress.values())} puzzles done")
+    return state
+def save_checkpoint(output_dir: Path, state: GenerationState, params: Dict):
+    meta = output_dir / "metadata.json"
+    tmp = meta.with_suffix(".tmp")
+    with open(tmp, "w") as f:
+        json.dump({"params": params, "state": state.to_dict()}, f, indent=2)
+    tmp.rename(meta)
+# ==================== Video I/O ====================
+def save_video_cv2(frames: list, path: str, fps: int = 10):
+    first = np.array(frames[0])
+    h, w = first.shape[:2]
+    writer = cv2.VideoWriter(str(path), cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+    for frame in frames:
+        writer.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
+    writer.release()
+def extract_last_frame(video_path: str) -> Optional[np.ndarray]:
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return None
+    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    if total > 0:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, total - 1)
+    ret, frame = cap.read()
+    cap.release()
+    if not ret or frame is None:
+        return None
+    return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+# ==================== Helpers ====================
+def _normalise_list(val, sizes, name="parameter"):
+    if isinstance(val, int):
+        return [val] * len(sizes)
+    if len(val) != len(sizes):
+        raise ValueError(f"{name} length ({len(val)}) != sizes ({len(sizes)})")
+    return list(val)
+# ==================== Generate ====================
+def generate_dataset(
+    output_dir: str = "frozenlake",
+    sizes: List[int] = [8, 16, 32],
+    num_per_size: list = [100, 500, 1000],
+    p: float = 0.8,
+    min_path_ratio: float = 0.3,
+    img_size: int = 512,
+    prompt: str = "Draw a continuous red line connecting the Start point to the Goal point, avoiding all holes.",
+    train_ratio: float = 0.9,
+    n_start: int = 2,
+    m_end: int = 3,
+    frames: Optional[int] = None,
+    fps: int = 10,
+    seed: int = 42,
+    use_gym: bool = True,
+    checkpoint_interval: int = 50,
+):
+    """
+    Generate FrozenLake video dataset with checkpoint/resume.
+    Layout::
+        output_dir/
+            images/ videos/ tables/
+            train.jsonl test.jsonl train.csv test.csv
+            path.json  metadata.json
+    """
+    params = {
+        "sizes": sizes, "num_per_size": num_per_size,
+        "p": p, "min_path_ratio": min_path_ratio, "img_size": img_size,
+        "prompt": prompt, "train_ratio": train_ratio,
+        "n_start": n_start, "m_end": m_end, "frames": frames,
+        "fps": fps, "seed": seed, "use_gym": use_gym,
+    }
+    out = Path(output_dir)
+    img_dir, vid_dir, tbl_dir = out / "images", out / "videos", out / "tables"
+    for d in (img_dir, vid_dir, tbl_dir):
+        d.mkdir(parents=True, exist_ok=True)
+    state = load_checkpoint(out, params)
+    if state and state.completed:
+        return
+    num_list = _normalise_list(
+        num_per_size[0] if len(num_per_size) == 1 else num_per_size,
+        sizes, "num_per_size",
+    )
+    num_w = len(str(max(num_list)))
+    proc = FrozenLakeProcessor(img_size=img_size)
+    if state is None:
+        random.seed(seed)
+        state = GenerationState(
+            params_hash=_params_hash(params),
+            size_progress={sz: 0 for sz in sizes},
+            seen_fingerprints=[], all_samples=[],
+        )
+        print(f"Fresh generation: sizes={sizes}, counts={num_list}, p={p}")
+        print(f"  frames={'auto' if frames is None else frames}, "
+              f"n_start={n_start}, m_end={m_end}, fps={fps}")
+    else:
+        random.seed(seed)
+        for _ in range(sum(state.size_progress.values()) * 10):
+            random.random()
+    seen = set(state.seen_fingerprints)
+    all_samples = list(state.all_samples)
+    progress = {int(k): v for k, v in state.size_progress.items()}
+    since_ckpt = 0
+    total_target = sum(num_list)
+    with tqdm(total=total_target, initial=sum(progress.values()),
+              desc="Total", unit="puzzle") as pbar:
+        for grid_size, target in zip(sizes, num_list):
+            generated = progress.get(grid_size, 0)
+            if generated >= target:
+                continue
+            min_len = max(1, int(grid_size * grid_size * min_path_ratio))
+            with tqdm(total=target, initial=generated,
+                      desc=f"Size {grid_size:3d}", unit="puzzle", leave=False) as pbar_sz:
+                for _ in range((target - generated) * 20):
+                    if generated >= target:
+                        break
+                    try:
+                        desc, path = proc.generate(grid_size, p=p, min_path_len=min_len)
+                    except RuntimeError:
+                        continue
+                    fp = proc.fingerprint(desc)
+                    if fp in seen:
+                        continue
+                    seen.add(fp)
+                    base = f"size{grid_size}_{generated:0{num_w}d}"
+                    img_name, vid_name, tbl_name = f"{base}.png", f"{base}.mp4", f"{base}.txt"
+                    proc.render(desc, use_gym=use_gym).save(str(img_dir / img_name))
+                    vid_frames = proc.generate_video_frames(
+                        desc, path, n_start=n_start, m_end=m_end,
+                        frames=frames, use_gym=use_gym,
+                    )
+                    save_video_cv2(vid_frames, str(vid_dir / vid_name), fps=fps)
+                    proc.save_table(str(tbl_dir / tbl_name), desc)
+                    udrl = proc.path_to_udrl(path)
+                    all_samples.append({
+                        "prompt": prompt, "image": img_name, "video": vid_name,
+                        "table": tbl_name, "grid_size": grid_size,
+                        "grid_desc": desc, "start": list(proc.find_start(desc)),
+                        "path_udrl": udrl, "path_length": len(path),
+                        "frame_count": len(vid_frames),
+                    })
+                    generated += 1
+                    progress[grid_size] = generated
+                    since_ckpt += 1
+                    pbar_sz.update(1)
+                    pbar.update(1)
+                    if since_ckpt >= checkpoint_interval:
+                        state.size_progress = progress
+                        state.seen_fingerprints = list(seen)
+                        state.all_samples = all_samples
+                        save_checkpoint(out, state, params)
+                        since_ckpt = 0
+            tqdm.write(f"Size {grid_size}: {generated} puzzles")
+    # --- Final outputs ---
+    with open(out / "path.json", "w") as f:
+        json.dump(
+            dict(sorted((s["image"], s["path_udrl"]) for s in all_samples)),
+            f, indent=4,
+        )
+    random.seed(seed + 1)
+    random.shuffle(all_samples)
+    split = int(len(all_samples) * train_ratio)
+    def _jsonl(samples, path):
+        with open(path, "w") as f:
+            for s in samples:
+                f.write(json.dumps(s) + "\n")
+    _jsonl(all_samples[:split], out / "train.jsonl")
+    _jsonl(all_samples[split:], out / "test.jsonl")
+    for name, samps in [("train", all_samples[:split]), ("test", all_samples[split:])]:
+        with open(out / f"{name}.csv", "w", newline="", encoding="utf-8") as f:
+            w = csv.writer(f)
+            w.writerow(["input_image", "video", "prompt"])
+            for s in samps:
+                w.writerow([f"images/{s['image']}", f"videos/{s['video']}", s["prompt"]])
+    state.size_progress = progress
+    state.seen_fingerprints = list(seen)
+    state.all_samples = all_samples
+    state.completed = True
+    save_checkpoint(out, state, params)
+    lengths = [s["path_length"] for s in all_samples]
+    fcounts = [s["frame_count"] for s in all_samples]
+    print(f"\n✓ Dataset complete: {out}/")
+    print(f"  Sizes: {sizes}, p={p}, Puzzles: {len(all_samples)}")
+    print(f"  Train: {split}, Test: {len(all_samples) - split}")
+    print(f"  Path lengths: avg={np.mean(lengths):.1f}, min={min(lengths)}, max={max(lengths)}")
+    print(f"  Frame counts: avg={np.mean(fcounts):.1f}, min={min(fcounts)}, max={max(fcounts)}")
+# ==================== Eval ====================
+def eval_videos(
+    video_dir: str,
+    table_dir: str,
+    output_json: Optional[str] = None,
+    gt_json: Optional[str] = None,
+    use_gym: bool = True,
+):
+    """Evaluate result videos: last frame → red path → verify."""
+    proc = FrozenLakeProcessor()
+    vid_root, tbl_root = Path(video_dir), Path(table_dir)
+    if output_json is None:
+        output_json = str(vid_root / "0_result.json")
+    videos = sorted(
+        vid_root.glob("*.mp4"),
+        key=lambda p: [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", p.stem)],
+    )
+    if not videos:
+        print(f"No .mp4 in {vid_root}")
+        return
+    print(f"Found {len(videos)} videos, table_dir={tbl_root}")
+    extracted: Dict[str, str] = {}
+    missing_tbl = missing_frame = 0
+    for vp in tqdm(videos, desc="Extracting"):
+        stem = vp.stem
+        desc = proc.load_table(str(tbl_root / f"{stem}.txt"))
+        if desc is None:
+            missing_tbl += 1
+            continue
+        start = proc.find_start(desc)
+        if start is None:
+            missing_tbl += 1
+            continue
+        lf = extract_last_frame(str(vp))
+        if lf is None:
+            missing_frame += 1
+            continue
+        extracted[f"{stem}.png"] = proc.extract_path_from_pixels(
+            lf, len(desc), len(desc[0]), start, desc
+        )
+    with open(output_json, "w") as f:
+        json.dump(extracted, f, indent=4)
+    print(f"Saved: {output_json}")
+    # Verify
+    correct = total_valid = 0
+    correctly_solved: List[Dict] = []
+    size_stats: Dict[int, Dict[str, int]] = {}
+    verify_fn = proc.verify_path_gym if use_gym else proc.verify_path_sim
+    for name, udrl in extracted.items():
+        desc = proc.load_table(str(tbl_root / f"{name.replace('.png', '')}.txt"))
+        if desc is None:
+            continue
+        total_valid += 1
+        sz = len(desc)
+        size_stats.setdefault(sz, {"total": 0, "correct": 0})
+        size_stats[sz]["total"] += 1
+        if verify_fn(desc, udrl):
+            correct += 1
+            size_stats[sz]["correct"] += 1
+            correctly_solved.append({"name": name, "length": len(udrl)})
+    acc = correct / total_valid * 100 if total_valid else 0
+    print(f"\n{'='*50}\nEvaluation Summary\n{'='*50}")
+    print(f"Videos: {len(videos)}, Missing tables: {missing_tbl}, "
+          f"Failed frames: {missing_frame}")
+    print(f"Evaluated: {total_valid}, Correct: {correct}, Accuracy: {acc:.2f}%")
+    if size_stats:
+        print("\nBy size:")
+        for sz in sorted(size_stats):
+            s = size_stats[sz]
+            print(f"  {sz:3d}: {s['correct']}/{s['total']} "
+                  f"({s['correct']/s['total']*100:.1f}%)")
+    correctly_solved.sort(key=lambda x: x["length"], reverse=True)
+    for i, item in enumerate(correctly_solved[:3]):
+        print(f"  Top {i+1}: {item['name']} (len={item['length']})")
+    if gt_json:
+        _gt_bins(extracted, gt_json, tbl_root, proc, verify_fn)
+    print(f"{'='*50}")
+def _gt_bins(extracted, gt_path, tbl_root, proc, verify_fn):
+    try:
+        with open(gt_path) as f:
+            gt = json.load(f)
+    except Exception:
+        return
+    bins: Dict[str, Dict[str, int]] = {}
+    for name, pred in extracted.items():
+        if name not in gt:
+            continue
+        lo = (len(gt[name]) // 10) * 10
+        label = f"{lo:3d}-{lo+9:3d}"
+        bins.setdefault(label, {"total": 0, "correct": 0})
+        bins[label]["total"] += 1
+        desc = proc.load_table(str(tbl_root / f"{name.replace('.png','')}.txt"))
+        if desc and verify_fn(desc, pred):
+            bins[label]["correct"] += 1
+    if bins:
+        print("\nBy GT path length:")
+        for label in sorted(bins):
+            b = bins[label]
+            print(f"  {label}: {b['correct']}/{b['total']} "
+                  f"({b['correct']/b['total']*100:.1f}%)")
+# ==================== Verify ====================
+def verify_results(json_file: str, table_dir: str, use_gym: bool = True):
+    proc = FrozenLakeProcessor()
+    with open(json_file) as f:
+        solutions = json.load(f)
+    verify_fn = proc.verify_path_gym if use_gym else proc.verify_path_sim
+    correct = skipped = valid = 0
+    for name, udrl in solutions.items():
+        desc = proc.load_table(str(Path(table_dir) / f"{name.replace('.png','')}.txt"))
+        if desc is None:
+            skipped += 1
+            continue
+        valid += 1
+        if verify_fn(desc, udrl):
+            correct += 1
+    acc = correct / valid * 100 if valid else 0
+    print(f"\n{'='*40}\nVerification: {correct}/{valid} ({acc:.2f}%)")
+    if skipped:
+        print(f"Skipped: {skipped}")
+    print(f"{'='*40}")
+# ==================== CLI ====================
+def parse_args():
+    p = argparse.ArgumentParser(description="FrozenLake video dataset")
+    sub = p.add_subparsers(dest="command")
+    gen = sub.add_parser("generate")
+    gen.add_argument("--output-dir", default="frozenlake")
+    gen.add_argument("--sizes", type=int, nargs="+", default=[8, 16, 32])
+    gen.add_argument("--num-per-size", type=int, nargs="+", default=[100, 500, 1000])
+    gen.add_argument("--p", type=float, default=0.8)
+    gen.add_argument("--min-path-ratio", type=float, default=0.1,
+                     help="Min path length as fraction of size² (default 0.1; "
+                          "FrozenLake paths are much shorter than maze paths)")
+    gen.add_argument("--img-size", type=int, default=1024)
+    gen.add_argument("--prompt", default="Draw a continuous red line connecting the Start point to the Goal point, avoiding all holes.")
+    gen.add_argument("--train-ratio", type=float, default=0.9)
+    gen.add_argument("--n-start", type=int, default=2)
+    gen.add_argument("--m-end", type=int, default=3)
+    gen.add_argument("--frames", type=int, default=None)
+    gen.add_argument("--fps", type=int, default=10)
+    gen.add_argument("--seed", type=int, default=42)
+    gen.add_argument("--no-gym", action="store_true")
+    gen.add_argument("--checkpoint-interval", type=int, default=50)
+    ev = sub.add_parser("eval")
+    ev.add_argument("video_dir")
+    ev.add_argument("--table-dir", required=True)
+    ev.add_argument("--output-json", default=None)
+    ev.add_argument("--gt-json", default=None)
+    ev.add_argument("--no-gym", action="store_true")
+    ver = sub.add_parser("verify")
+    ver.add_argument("json_file")
+    ver.add_argument("--table-dir", required=True)
+    ver.add_argument("--no-gym", action="store_true")
+    return p.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    if args.command == "generate":
+        kw = {k: v for k, v in vars(args).items() if k not in ("command", "no_gym")}
+        kw["use_gym"] = not args.no_gym
+        generate_dataset(**kw)
+    elif args.command == "eval":
+        eval_videos(args.video_dir, args.table_dir, args.output_json,
+                    args.gt_json, not args.no_gym)
+    elif args.command == "verify":
+        verify_results(args.json_file, args.table_dir, not args.no_gym)
+    else:
+        print("Usage: python frozenlake_video_gen.py {generate|eval|verify} ...")

frozenlake/frozenlake_processor.py ADDED Viewed

	@@ -0,0 +1,466 @@

+"""
+FrozenLakeProcessor - FrozenLake puzzle generation, solving, rendering, and evaluation.
+Grid cells:  S=Start, F=Frozen(safe), H=Hole(death), G=Goal
+Table chars: @=Start, _=Frozen, #=Hole, *=Goal
+Performance notes vs original DiffThinker code:
+  - Solving uses plain BFS (O(n²)) instead of networkx graph construction
+    which had massive overhead from add_node/add_edge Python calls.
+  - Gym renderer is cached per puzzle to avoid repeated pygame init.
+"""
+import os
+import random
+import warnings
+from collections import deque
+from typing import List, Tuple, Optional
+import numpy as np
+from PIL import Image, ImageDraw
+try:
+    os.environ.setdefault("SDL_AUDIODRIVER", "dummy")
+    warnings.filterwarnings("ignore", category=UserWarning, module="pygame")
+    warnings.filterwarnings("ignore", category=DeprecationWarning)
+    import gymnasium as gym
+    HAS_GYM = True
+except ImportError:
+    HAS_GYM = False
+# Table ↔ Grid mapping
+TABLE_TO_GRID = {"@": "S", "_": "F", "#": "H", "*": "G"}
+GRID_TO_TABLE = {v: k for k, v in TABLE_TO_GRID.items()}
+MOVES = {"U": (-1, 0), "D": (1, 0), "L": (0, -1), "R": (0, 1)}
+GYM_ACTION_MAP = {"L": 0, "D": 1, "R": 2, "U": 3}
+GridDesc = List[str]
+class FrozenLakeProcessor:
+    """FrozenLake generation, BFS solving, rendering, and evaluation."""
+    def __init__(self, img_size: int = 512):
+        self.img_size = img_size
+        self.path_color = "red"
+    # ==================== Generation ====================
+    def generate(
+        self,
+        size: int,
+        p: float = 0.8,
+        min_path_len: int = 1,
+        max_attempts: int = 500,
+    ) -> Tuple[GridDesc, List[Tuple[int, int]]]:
+        """
+        Generate a solvable FrozenLake grid with shortest path >= *min_path_len* moves.
+        Returns:
+            (desc, path) — desc is list[str], path is list[(r,c)].
+        """
+        for _ in range(max_attempts):
+            desc = self._random_layout(size, p)
+            path = self.solve(desc)
+            if path is not None and (len(path) - 1) >= min_path_len:
+                return desc, path
+        raise RuntimeError(
+            f"Failed after {max_attempts} attempts "
+            f"(size={size}, p={p}, min_path_len={min_path_len})."
+        )
+    @staticmethod
+    def _random_layout(size: int, p: float = 0.8) -> GridDesc:
+        """Random grid with one S and one G at random positions."""
+        all_coords = [(r, c) for r in range(size) for c in range(size)]
+        start, goal = random.sample(all_coords, 2)
+        grid = []
+        for r in range(size):
+            row = []
+            for c in range(size):
+                if (r, c) == start:
+                    row.append("S")
+                elif (r, c) == goal:
+                    row.append("G")
+                else:
+                    row.append("F" if random.random() < p else "H")
+            grid.append("".join(row))
+        return grid
+    # ==================== Solving (plain BFS — fast) ====================
+    @staticmethod
+    def solve(desc: GridDesc) -> Optional[List[Tuple[int, int]]]:
+        """
+        BFS shortest path from S to G, avoiding H.
+        ~100× faster than networkx for typical grid sizes because it avoids
+        Python-level graph object construction entirely.
+        Returns:
+            List of (r, c) or None.
+        """
+        rows, cols = len(desc), len(desc[0])
+        start = goal = None
+        for r in range(rows):
+            for c in range(cols):
+                if desc[r][c] == "S":
+                    start = (r, c)
+                elif desc[r][c] == "G":
+                    goal = (r, c)
+        if start is None or goal is None:
+            return None
+        visited = [[False] * cols for _ in range(rows)]
+        visited[start[0]][start[1]] = True
+        queue: deque = deque([(start, [start])])
+        while queue:
+            (r, c), path = queue.popleft()
+            if (r, c) == goal:
+                return path
+            for dr, dc in ((-1, 0), (1, 0), (0, -1), (0, 1)):
+                nr, nc = r + dr, c + dc
+                if 0 <= nr < rows and 0 <= nc < cols and not visited[nr][nc]:
+                    ch = desc[nr][nc]
+                    if ch != "H":
+                        visited[nr][nc] = True
+                        queue.append(((nr, nc), path + [(nr, nc)]))
+        return None
+    # ==================== Path ↔ UDRL ====================
+    @staticmethod
+    def path_to_udrl(path: List[Tuple[int, int]]) -> str:
+        """Convert coordinate path to UDRL string."""
+        moves = []
+        for i in range(len(path) - 1):
+            r1, c1 = path[i]
+            r2, c2 = path[i + 1]
+            if r2 < r1:
+                moves.append("U")
+            elif r2 > r1:
+                moves.append("D")
+            elif c2 < c1:
+                moves.append("L")
+            else:
+                moves.append("R")
+        return "".join(moves)
+    # ==================== Verification ====================
+    def verify_path_sim(self, desc: GridDesc, udrl: str) -> bool:
+        """Verify UDRL via grid simulation (no dependencies)."""
+        rows, cols = len(desc), len(desc[0])
+        start = self.find_start(desc)
+        if start is None:
+            return False
+        r, c = start
+        clean = udrl.replace(",", "").replace(" ", "").strip()
+        if "Action plan" in clean:
+            clean = clean.rsplit("Action plan", 1)[-1]
+        for ch in clean:
+            if ch not in MOVES:
+                continue
+            dr, dc = MOVES[ch]
+            nr, nc = r + dr, c + dc
+            if not (0 <= nr < rows and 0 <= nc < cols):
+                return False
+            cell = desc[nr][nc]
+            if cell == "H":
+                return False
+            r, c = nr, nc
+            if cell == "G":
+                return True
+        return desc[r][c] == "G"
+    def verify_path_gym(self, desc: GridDesc, udrl: str) -> bool:
+        """Verify via gymnasium (falls back to sim if unavailable)."""
+        if not HAS_GYM:
+            return self.verify_path_sim(desc, udrl)
+        rows, cols = len(desc), len(desc[0])
+        try:
+            env = gym.make(
+                "FrozenLake-v1", desc=desc,
+                map_name=f"{rows}x{cols}", is_slippery=False, render_mode=None,
+            )
+            env.reset(seed=42)
+            success = False
+            clean = udrl.replace(",", "").replace(" ", "").strip()
+            if "Action plan" in clean:
+                clean = clean.rsplit("Action plan", 1)[-1]
+            for ch in clean:
+                if ch not in GYM_ACTION_MAP:
+                    continue
+                _, reward, terminated, truncated, _ = env.step(GYM_ACTION_MAP[ch])
+                if terminated or truncated:
+                    success = reward > 0
+                    break
+            env.close()
+            return success
+        except Exception:
+            return self.verify_path_sim(desc, udrl)
+    # ==================== Table Text I/O ====================
+    def encode_table(self, desc: GridDesc) -> str:
+        """Encode to pipe-delimited table format."""
+        size = len(desc)
+        lines = ["| | " + " | ".join(f"Col {i+1}" for i in range(size)) + " |"]
+        for r in range(size):
+            mapped = [GRID_TO_TABLE[ch] for ch in desc[r]]
+            lines.append(f"| Row {r+1} | " + " | ".join(mapped) + " |")
+        return "\n".join(lines)
+    def decode_table(self, text: str) -> Optional[GridDesc]:
+        """Parse table text back to GridDesc."""
+        try:
+            rows = []
+            for line in text.strip().splitlines():
+                line = line.strip()
+                if not line or "Col" in line or "---" in line:
+                    continue
+                parts = [p.strip() for p in line.split("|")]
+                clean = [p for p in parts if p]
+                if len(clean) < 2:
+                    continue
+                row_str = "".join(
+                    TABLE_TO_GRID[ch] for ch in clean[1:] if ch in TABLE_TO_GRID
+                )
+                if row_str:
+                    rows.append(row_str)
+            return rows if rows else None
+        except Exception:
+            return None
+    def save_table(self, filepath: str, desc: GridDesc) -> None:
+        with open(filepath, "w") as f:
+            f.write(self.encode_table(desc))
+    def load_table(self, filepath: str) -> Optional[GridDesc]:
+        try:
+            with open(filepath) as f:
+                return self.decode_table(f.read())
+        except Exception:
+            return None
+    def find_start(self, desc: GridDesc) -> Optional[Tuple[int, int]]:
+        for r, row in enumerate(desc):
+            for c, ch in enumerate(row):
+                if ch == "S":
+                    return (r, c)
+        return None
+    def fingerprint(self, desc: GridDesc) -> str:
+        return "".join(desc)
+    # ==================== Rendering ====================
+    def render_gym(self, desc: GridDesc) -> Optional[Image.Image]:
+        """Render via gymnasium (creates a pygame window — slow)."""
+        if not HAS_GYM:
+            return None
+        try:
+            env = gym.make(
+                "FrozenLake-v1", desc=desc,
+                is_slippery=False, render_mode="rgb_array",
+            )
+            env.reset()
+            rgb = env.render()
+            env.close()
+            return Image.fromarray(rgb).resize(
+                (self.img_size, self.img_size), Image.NEAREST
+            )
+        except Exception:
+            return None
+    def render_simple(self, desc: GridDesc) -> Image.Image:
+        """Fast PIL-only renderer (no pygame dependency)."""
+        size = len(desc)
+        cell = self.img_size // size
+        img = Image.new("RGB", (self.img_size, self.img_size), (255, 255, 255))
+        draw = ImageDraw.Draw(img)
+        colors = {
+            "S": (0, 0, 255), "F": (200, 220, 255),
+            "H": (80, 80, 80), "G": (0, 200, 0),
+        }
+        for r in range(size):
+            for c in range(size):
+                x0, y0 = c * cell, r * cell
+                draw.rectangle(
+                    [x0, y0, x0 + cell - 1, y0 + cell - 1],
+                    fill=colors.get(desc[r][c], (200, 220, 255)),
+                )
+        for i in range(size + 1):
+            draw.line([(i * cell, 0), (i * cell, self.img_size)], fill="black", width=1)
+            draw.line([(0, i * cell), (self.img_size, i * cell)], fill="black", width=1)
+        return img
+    def render(self, desc: GridDesc, use_gym: bool = True) -> Image.Image:
+        if use_gym:
+            img = self.render_gym(desc)
+            if img is not None:
+                return img
+        return self.render_simple(desc)
+    def draw_solution_line(
+        self, image: Image.Image, path: List[Tuple[int, int]], grid_size: int,
+    ) -> Image.Image:
+        """Draw red line on *image* (modifies in-place)."""
+        draw = ImageDraw.Draw(image)
+        w, h = image.size
+        cw, ch_ = w / grid_size, h / grid_size
+        pts = [(c * cw + cw / 2, r * ch_ + ch_ / 2) for r, c in path]
+        draw.line(pts, fill=self.path_color, width=max(1, int(cw / 4)), joint="curve")
+        return image
+    # ==================== Video Frames ====================
+    def generate_video_frames(
+        self,
+        desc: GridDesc,
+        path: List[Tuple[int, int]],
+        n_start: int = 5,
+        m_end: int = 5,
+        frames: Optional[int] = None,
+        use_gym: bool = True,
+    ) -> List[Image.Image]:
+        """
+        Progressive red-line video frames.
+        *frames* controls content frames between holds:
+        None → 1 per step, >steps → slow-mo, <steps → fast-fwd.
+        """
+        size = len(desc)
+        n_steps = len(path) - 1
+        base_img = self.render(desc, use_gym=use_gym)
+        if n_steps <= 0:
+            return [base_img] * (n_start + m_end + 1)
+        content = frames if frames is not None else n_steps
+        content = max(1, content)
+        result: List[Image.Image] = []
+        # Opening hold
+        result.extend([base_img.copy() for _ in range(n_start)])
+        def _partial(steps: int) -> Image.Image:
+            return self.draw_solution_line(base_img.copy(), path[: steps + 1], size)
+        if content == n_steps:
+            for s in range(1, n_steps + 1):
+                result.append(_partial(s))
+        elif content > n_steps:
+            for s in range(1, n_steps + 1):
+                lo = (s - 1) * content // n_steps
+                hi = s * content // n_steps
+                frame = _partial(s)
+                result.append(frame)
+                for _ in range(hi - lo - 1):
+                    result.append(frame.copy())
+        else:
+            for f in range(content):
+                result.append(_partial((f + 1) * n_steps // content))
+        # Closing hold
+        final = _partial(n_steps)
+        result.extend([final.copy() for _ in range(m_end)])
+        return result
+    # ==================== Red-Path Extraction ====================
+    def extract_path_from_pixels(
+        self,
+        pixels: np.ndarray,
+        rows: int,
+        cols: int,
+        start: Tuple[int, int],
+        desc: Optional[GridDesc] = None,
+        pixel_threshold: float = 0.01,
+    ) -> str:
+        """Detect red path in RGB array, return UDRL."""
+        img = Image.fromarray(pixels)
+        w, h = img.size
+        px = np.array(img, dtype=float)
+        r_ch, g_ch, b_ch = px[:, :, 0], px[:, :, 1], px[:, :, 2]
+        red_mask = (r_ch > 100) & (r_ch > g_ch * 1.2) & (r_ch > b_ch * 1.2)
+        cell_h, cell_w = h // rows, w // cols
+        path_grid = np.zeros((rows, cols), dtype=bool)
+        for r in range(rows):
+            for c in range(cols):
+                sub = red_mask[r * cell_h : (r + 1) * cell_h,
+                               c * cell_w : (c + 1) * cell_w]
+                if sub.size > 0 and np.mean(sub) > pixel_threshold:
+                    path_grid[r, c] = True
+        # Greedy walk
+        visited = {start}
+        cr, cc = start
+        actions: List[str] = []
+        for _ in range(rows * cols * 2):
+            found = False
+            for act, (dr, dc) in [("R", (0, 1)), ("D", (1, 0)), ("L", (0, -1)), ("U", (-1, 0))]:
+                nr, nc = cr + dr, cc + dc
+                if 0 <= nr < rows and 0 <= nc < cols:
+                    if path_grid[nr, nc] and (nr, nc) not in visited:
+                        visited.add((nr, nc))
+                        actions.append(act)
+                        cr, cc = nr, nc
+                        found = True
+                        break
+            if not found:
+                break
+        return "".join(actions)
+    def extract_path_from_image(
+        self, img_path: str, rows: int, cols: int, start: Tuple, desc=None,
+    ) -> str:
+        """Extract UDRL from an image file."""
+        try:
+            pixels = np.array(Image.open(img_path).convert("RGB"))
+            return self.extract_path_from_pixels(pixels, rows, cols, start, desc)
+        except Exception:
+            return ""
+if __name__ == "__main__":
+    import time
+    proc = FrozenLakeProcessor(img_size=512)
+    # Benchmark BFS vs problem sizes
+    for sz in [8, 16, 32, 64]:
+        t0 = time.perf_counter()
+        count = 0
+        for _ in range(100):
+            desc = proc._random_layout(sz, p=0.8)
+            path = proc.solve(desc)
+            if path:
+                count += 1
+        elapsed = time.perf_counter() - t0
+        print(f"Size {sz:3d}: 100 BFS solves in {elapsed:.3f}s "
+              f"({count} solvable, {elapsed/100*1000:.1f}ms/solve)")
+    # Functional test
+    desc, path = proc.generate(size=16, p=0.8, min_path_len=20)
+    udrl = proc.path_to_udrl(path)
+    print(f"\nGenerate 16×16: path={len(path)}, UDRL={udrl[:40]}...")
+    print(f"Verify (sim): {proc.verify_path_sim(desc, udrl)}")
+    # Table round-trip
+    decoded = proc.decode_table(proc.encode_table(desc))
+    assert decoded == desc
+    print("Table round-trip: ✓")
+    # Render + extract round-trip
+    img = proc.render(desc, use_gym=False)
+    sol = proc.draw_solution_line(img.copy(), path, len(desc))
+    start = proc.find_start(desc)
+    extracted = proc.extract_path_from_pixels(np.array(sol), len(desc), len(desc[0]), start)
+    print(f"Extract round-trip verify: {proc.verify_path_sim(desc, extracted)}")
+    print("All tests passed ✓")

maze/data_process.py ADDED Viewed

	@@ -0,0 +1,651 @@

+"""
+Maze Video Dataset Generator — generates maze puzzle images and solution videos
+with checkpoint/resume support, train/test splitting, and JSONL metadata.
+Includes an ``eval`` subcommand that takes a directory of result videos,
+extracts the last frame from each, parses the red path, and verifies it
+against the ground-truth maze text files.
+Usage:
+    # Generate
+    python maze_video_gen.py generate --output-dir maze --sizes 8 16 32 \
+        --num-per-size 100 500 1000 --min-path-ratio 0.3 \
+        --n-start 5 --m-end 5 --frames 50 --fps 10 --seed 42
+    # Evaluate result videos
+    python maze_video_gen.py eval result_videos/ --text-dir maze/texts
+    # Verify a pre-extracted JSON
+    python maze_video_gen.py verify results.json --text-dir maze/texts
+"""
+import json
+import csv
+import hashlib
+import random
+import re
+import argparse
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Dict, List, Optional
+import cv2
+import numpy as np
+from tqdm import tqdm
+from maze_processor import MazeProcessor
+# ==================== Checkpoint Management ====================
+@dataclass
+class GenerationState:
+    """Tracks generation progress for checkpoint/resume."""
+    params_hash: str
+    size_progress: Dict[int, int]
+    seen_fingerprints: List[str]
+    all_samples: List[Dict]
+    completed: bool = False
+    def to_dict(self) -> Dict:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, d: Dict) -> "GenerationState":
+        return cls(**d)
+def _params_hash(params: Dict) -> str:
+    """Deterministic hash of generation parameters (excluding output_dir)."""
+    key = {k: v for k, v in params.items() if k != "output_dir"}
+    return hashlib.md5(json.dumps(key, sort_keys=True).encode()).hexdigest()[:12]
+def load_checkpoint(output_dir: Path, params: Dict) -> Optional[GenerationState]:
+    """Load checkpoint if it exists and parameters match."""
+    meta = output_dir / "metadata.json"
+    if not meta.exists():
+        return None
+    with open(meta) as f:
+        data = json.load(f)
+    state = GenerationState.from_dict(data["state"])
+    expected = _params_hash(params)
+    if state.params_hash != expected:
+        print(f"⚠️  Parameters changed ({state.params_hash} → {expected}), starting fresh")
+        return None
+    if state.completed:
+        print("✓ Generation already completed")
+        return state
+    done = sum(state.size_progress.values())
+    print(f"✓ Resuming from checkpoint: {done} mazes generated")
+    return state
+def save_checkpoint(output_dir: Path, state: GenerationState, params: Dict):
+    """Atomically write checkpoint to metadata.json."""
+    meta = output_dir / "metadata.json"
+    tmp = meta.with_suffix(".tmp")
+    with open(tmp, "w") as f:
+        json.dump({"params": params, "state": state.to_dict()}, f, indent=2)
+    tmp.rename(meta)
+# ==================== Video I/O ====================
+def save_video_cv2(frames: list, path: str, fps: int = 10):
+    """Save list of PIL Images as an mp4 video."""
+    first = np.array(frames[0])
+    h, w = first.shape[:2]
+    writer = cv2.VideoWriter(
+        str(path), cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)
+    )
+    for frame in frames:
+        writer.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
+    writer.release()
+def extract_last_frame(video_path: str) -> Optional[np.ndarray]:
+    """
+    Extract the last frame from a video file as an RGB numpy array.
+    Returns:
+        (H, W, 3) uint8 RGB array, or None on failure.
+    """
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return None
+    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    if total > 0:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, total - 1)
+    ret, frame = cap.read()
+    cap.release()
+    if not ret or frame is None:
+        return None
+    return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+# ==================== Normalisation Helpers ====================
+def _normalise_list(val, sizes, name="parameter"):
+    """Broadcast a single int to a list, or validate list length."""
+    if isinstance(val, int):
+        return [val] * len(sizes)
+    if len(val) != len(sizes):
+        raise ValueError(f"{name} length ({len(val)}) != sizes length ({len(sizes)})")
+    return list(val)
+# ==================== Core Dataset Generation ====================
+def generate_dataset(
+    output_dir: str = "maze",
+    sizes: List[int] = [8, 16, 32],
+    num_per_size: list = [100, 500, 1000],
+    min_path_ratio: float = 0.3,
+    img_size: int = 1024,
+    prompt: str = "Draw a continuous red line from the yellow dot to the blue dot, avoiding all walls.",
+    train_ratio: float = 0.9,
+    n_start: int = 5,
+    m_end: int = 5,
+    frames: Optional[int] = None,
+    fps: int = 10,
+    seed: int = 42,
+    checkpoint_interval: int = 50,
+):
+    """
+    Generate maze video dataset with checkpoint/resume support.
+    The *frames* parameter controls content frames per video:
+      - None   → one content frame per path step (variable length)
+      - N > 0  → exactly N content frames (slow-mo / fast-fwd as needed)
+    Directory layout::
+        output_dir/
+            images/       — puzzle PNG (no solution line)
+            videos/       — solution MP4 (progressive red line)
+            texts/        — maze text files (bitmask format)
+            train.jsonl / test.jsonl
+            train.csv   / test.csv
+            path.json     — UDRL answer key
+            metadata.json — checkpoint state
+    """
+    params = {
+        "sizes": sizes, "num_per_size": num_per_size,
+        "min_path_ratio": min_path_ratio, "img_size": img_size,
+        "prompt": prompt, "train_ratio": train_ratio,
+        "n_start": n_start, "m_end": m_end, "frames": frames,
+        "fps": fps, "seed": seed,
+    }
+    out = Path(output_dir)
+    img_dir = out / "images"
+    vid_dir = out / "videos"
+    txt_dir = out / "texts"
+    for d in (img_dir, vid_dir, txt_dir):
+        d.mkdir(parents=True, exist_ok=True)
+    state = load_checkpoint(out, params)
+    if state and state.completed:
+        return
+    num_list = _normalise_list(
+        num_per_size[0] if len(num_per_size) == 1 else num_per_size,
+        sizes, "num_per_size",
+    )
+    max_puzzles = max(num_list)
+    num_w = len(str(max_puzzles))
+    proc = MazeProcessor(img_size=img_size)
+    if state is None:
+        random.seed(seed)
+        state = GenerationState(
+            params_hash=_params_hash(params),
+            size_progress={sz: 0 for sz in sizes},
+            seen_fingerprints=[],
+            all_samples=[],
+        )
+        print(f"Starting fresh generation: sizes={sizes}, counts={num_list}")
+        print(f"  frames={'auto (1 per step)' if frames is None else frames}, "
+              f"n_start={n_start}, m_end={m_end}, fps={fps}")
+    else:
+        random.seed(seed)
+        for _ in range(sum(state.size_progress.values()) * 10):
+            random.random()
+    seen = set(state.seen_fingerprints)
+    all_samples = list(state.all_samples)
+    progress = {int(k): v for k, v in state.size_progress.items()}
+    since_ckpt = 0
+    total_target = sum(num_list)
+    total_done = sum(progress.values())
+    with tqdm(total=total_target, initial=total_done, desc="Total", unit="maze") as pbar:
+        for maze_size, target in zip(sizes, num_list):
+            generated = progress.get(maze_size, 0)
+            if generated >= target:
+                continue
+            min_len = max(1, int(maze_size * maze_size * min_path_ratio))
+            max_attempts = (target - generated) * 20
+            with tqdm(
+                total=target, initial=generated, desc=f"Size {maze_size:3d}",
+                unit="maze", leave=False,
+            ) as pbar_sz:
+                for _ in range(max_attempts):
+                    if generated >= target:
+                        break
+                    try:
+                        grid, start, end, path = proc.generate(
+                            maze_size, min_path_len=min_len
+                        )
+                    except RuntimeError:
+                        continue
+                    fp = proc.fingerprint(grid, start, end)
+                    if fp in seen:
+                        continue
+                    seen.add(fp)
+                    idx = generated
+                    base = f"size{maze_size}_{idx:0{num_w}d}"
+                    img_name = f"{base}.png"
+                    vid_name = f"{base}.mp4"
+                    txt_name = f"{base}.txt"
+                    puzzle_img = proc.render(grid, start, end)
+                    puzzle_img.save(str(img_dir / img_name))
+                    vid_frames = proc.generate_video_frames(
+                        grid, start, end, path,
+                        n_start=n_start, m_end=m_end, frames=frames,
+                    )
+                    save_video_cv2(vid_frames, str(vid_dir / vid_name), fps=fps)
+                    proc.save_text(str(txt_dir / txt_name), grid, start, end)
+                    udrl = proc.path_to_udrl(path)
+                    all_samples.append({
+                        "prompt": prompt,
+                        "image": img_name,
+                        "video": vid_name,
+                        "text": txt_name,
+                        "maze_size": maze_size,
+                        "start": list(start),
+                        "end": list(end),
+                        "path_udrl": udrl,
+                        "path_length": len(path),
+                        "frame_count": len(vid_frames),
+                    })
+                    generated += 1
+                    progress[maze_size] = generated
+                    since_ckpt += 1
+                    pbar_sz.update(1)
+                    pbar.update(1)
+                    if since_ckpt >= checkpoint_interval:
+                        state.size_progress = progress
+                        state.seen_fingerprints = list(seen)
+                        state.all_samples = all_samples
+                        save_checkpoint(out, state, params)
+                        since_ckpt = 0
+            tqdm.write(
+                f"Size {maze_size}: {generated} mazes, "
+                f"{sum(1 for s in all_samples if s['maze_size'] == maze_size)} samples"
+            )
+    # ==================== Final outputs ====================
+    path_answers = {s["image"]: s["path_udrl"] for s in all_samples}
+    with open(out / "path.json", "w") as f:
+        json.dump(dict(sorted(path_answers.items())), f, indent=4)
+    random.seed(seed + 1)
+    random.shuffle(all_samples)
+    split = int(len(all_samples) * train_ratio)
+    def _write_jsonl(samples, path):
+        with open(path, "w") as f:
+            for s in samples:
+                f.write(json.dumps(s) + "\n")
+    _write_jsonl(all_samples[:split], out / "train.jsonl")
+    _write_jsonl(all_samples[split:], out / "test.jsonl")
+    for name, samples in [("train", all_samples[:split]), ("test", all_samples[split:])]:
+        with open(out / f"{name}.csv", "w", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f)
+            writer.writerow(["input_image", "video", "prompt"])
+            for s in samples:
+                writer.writerow([
+                    f"images/{s['image']}", f"videos/{s['video']}", s["prompt"]
+                ])
+    state.size_progress = progress
+    state.seen_fingerprints = list(seen)
+    state.all_samples = all_samples
+    state.completed = True
+    save_checkpoint(out, state, params)
+    print(f"\n✓ Dataset complete: {out}/")
+    print(f"  Sizes: {sizes}")
+    print(f"  Mazes: {len(all_samples)}")
+    print(f"  Train: {split}, Test: {len(all_samples) - split}")
+    lengths = [s["path_length"] for s in all_samples]
+    fcounts = [s["frame_count"] for s in all_samples]
+    print(f"  Path lengths: avg={np.mean(lengths):.1f}, "
+          f"min={min(lengths)}, max={max(lengths)}")
+    print(f"  Frame counts: avg={np.mean(fcounts):.1f}, "
+          f"min={min(fcounts)}, max={max(fcounts)}")
+# ==================== Eval: Video → Last Frame → Verify ====================
+def eval_videos(
+    video_dir: str,
+    text_dir: str,
+    output_json: Optional[str] = None,
+    gt_json: Optional[str] = None,
+):
+    """
+    Evaluate a directory of result videos against ground-truth mazes.
+    Pipeline per video:
+        1. Extract last frame from .mp4
+        2. Detect red path via pixel analysis
+        3. Convert to UDRL action string
+        4. Verify against maze .txt (wall-respecting walk from start to end)
+    Matching convention:
+        Video ``<stem>.mp4``  →  Text ``<stem>.txt``  in *text_dir*.
+        Common stems: ``size8_000``, ``size16_042``, etc.
+    Args:
+        video_dir:   Directory containing result .mp4 files.
+        text_dir:    Directory containing ground-truth maze .txt files.
+        output_json: Path to save extracted paths as JSON (default: video_dir/0_result.json).
+        gt_json:     Optional ground-truth answer JSON for accuracy by path length.
+    """
+    proc = MazeProcessor()
+    vid_root = Path(video_dir)
+    txt_root = Path(text_dir)
+    if output_json is None:
+        output_json = str(vid_root / "0_result.json")
+    # Collect videos
+    videos = sorted(
+        vid_root.glob("*.mp4"),
+        key=lambda p: [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", p.stem)],
+    )
+    if not videos:
+        print(f"No .mp4 files found in {vid_root}")
+        return
+    print(f"Found {len(videos)} result videos in {vid_root}")
+    print(f"Text dir: {txt_root}")
+    # --- Phase 1: Extract paths from last frames ---
+    extracted: Dict[str, str] = {}
+    missing_txt = 0
+    missing_frame = 0
+    for vpath in tqdm(videos, desc="Extracting paths"):
+        stem = vpath.stem                        # e.g. "size8_000"
+        txt_path = txt_root / f"{stem}.txt"
+        if not txt_path.exists():
+            missing_txt += 1
+            continue
+        maze = proc.load_text(str(txt_path))
+        if maze is None:
+            missing_txt += 1
+            continue
+        last_frame = extract_last_frame(str(vpath))
+        if last_frame is None:
+            missing_frame += 1
+            continue
+        udrl = proc.extract_path_from_pixels(
+            last_frame,
+            grid_raw=maze["grid_raw"],
+            size=maze["size"],
+            start=maze["start"],
+        )
+        extracted[f"{stem}.png"] = udrl   # keyed by image name for consistency
+    # Save extracted paths
+    with open(output_json, "w", encoding="utf-8") as f:
+        json.dump(extracted, f, indent=4)
+    print(f"\nExtracted paths saved to: {output_json}")
+    # --- Phase 2: Verify ---
+    correct = 0
+    total_valid = 0
+    correctly_solved: List[Dict] = []
+    for name, udrl in extracted.items():
+        stem = name.replace(".png", "")
+        txt_path = txt_root / f"{stem}.txt"
+        maze = proc.load_text(str(txt_path))
+        if maze is None:
+            continue
+        total_valid += 1
+        if proc.verify_path(maze["grid"], maze["start"], maze["end"], udrl):
+            correct += 1
+            correctly_solved.append({"name": name, "length": len(udrl)})
+    acc = (correct / total_valid * 100) if total_valid else 0
+    print(f"\n{'=' * 50}")
+    print("Evaluation Summary")
+    print(f"{'=' * 50}")
+    print(f"Total Videos       : {len(videos)}")
+    print(f"Missing .txt       : {missing_txt}")
+    print(f"Failed Frame Read  : {missing_frame}")
+    print(f"Evaluated          : {total_valid}")
+    print(f"Correctly Solved   : {correct}")
+    print(f"Accuracy           : {acc:.2f}%")
+    print(f"{'-' * 50}")
+    # Breakdown by maze size
+    size_stats: Dict[int, Dict[str, int]] = {}
+    for name, udrl in extracted.items():
+        stem = name.replace(".png", "")
+        txt_path = txt_root / f"{stem}.txt"
+        maze = proc.load_text(str(txt_path))
+        if maze is None:
+            continue
+        sz = maze["size"]
+        if sz not in size_stats:
+            size_stats[sz] = {"total": 0, "correct": 0}
+        size_stats[sz]["total"] += 1
+        if proc.verify_path(maze["grid"], maze["start"], maze["end"], udrl):
+            size_stats[sz]["correct"] += 1
+    if size_stats:
+        print("\nAccuracy by maze size:")
+        for sz in sorted(size_stats):
+            s = size_stats[sz]
+            sz_acc = s["correct"] / s["total"] * 100 if s["total"] else 0
+            print(f"  Size {sz:3d}: {s['correct']:4d}/{s['total']:4d} ({sz_acc:.2f}%)")
+    # Top longest correct
+    correctly_solved.sort(key=lambda x: x["length"], reverse=True)
+    if correctly_solved:
+        print(f"\nTop 3 Longest Correct Paths:")
+        for i, item in enumerate(correctly_solved[:3]):
+            print(f"  {i+1}. {item['name']} (length: {item['length']})")
+    # Optional: compare with ground-truth JSON for path-length-binned accuracy
+    if gt_json:
+        _compare_with_gt(extracted, gt_json, txt_root, proc)
+    print(f"{'=' * 50}")
+def _compare_with_gt(
+    extracted: Dict[str, str],
+    gt_json_path: str,
+    txt_root: Path,
+    proc: MazeProcessor,
+):
+    """Print accuracy binned by ground-truth path length."""
+    try:
+        with open(gt_json_path) as f:
+            gt = json.load(f)
+    except Exception:
+        print(f"  Warning: could not load ground-truth JSON: {gt_json_path}")
+        return
+    bins: Dict[str, Dict[str, int]] = {}  # "10-19" -> {total, correct}
+    for name, pred_udrl in extracted.items():
+        if name not in gt:
+            continue
+        gt_udrl = gt[name]
+        gt_len = len(gt_udrl)
+        # Bin by path length (decades)
+        lo = (gt_len // 10) * 10
+        hi = lo + 9
+        label = f"{lo:3d}-{hi:3d}"
+        if label not in bins:
+            bins[label] = {"total": 0, "correct": 0}
+        bins[label]["total"] += 1
+        stem = name.replace(".png", "")
+        maze = proc.load_text(str(txt_root / f"{stem}.txt"))
+        if maze and proc.verify_path(maze["grid"], maze["start"], maze["end"], pred_udrl):
+            bins[label]["correct"] += 1
+    if bins:
+        print("\nAccuracy by GT path length:")
+        for label in sorted(bins):
+            b = bins[label]
+            b_acc = b["correct"] / b["total"] * 100 if b["total"] else 0
+            print(f"  Length {label}: {b['correct']:4d}/{b['total']:4d} ({b_acc:.2f}%)")
+# ==================== Verify: Pre-extracted JSON ====================
+def verify_results(json_file: str, text_dir: str):
+    """
+    Verify pre-extracted UDRL paths (from a JSON file) against maze .txt files.
+    Args:
+        json_file: Path to JSON with {name: udrl_string} predictions.
+        text_dir:  Directory containing maze .txt files.
+    """
+    proc = MazeProcessor()
+    json_path = Path(json_file)
+    txt_root = Path(text_dir)
+    with open(json_path) as f:
+        solutions = json.load(f)
+    correct = skipped = valid = 0
+    for name, udrl in solutions.items():
+        clean = name.replace(".png", "")
+        txt_path = txt_root / f"{clean}.txt"
+        maze = proc.load_text(str(txt_path))
+        if maze is None:
+            skipped += 1
+            continue
+        valid += 1
+        if proc.verify_path(maze["grid"], maze["start"], maze["end"], udrl):
+            correct += 1
+    acc = (correct / valid * 100) if valid else 0
+    print(f"\n{'='*40}")
+    print(f"Verification: {correct}/{valid} correct ({acc:.2f}%)")
+    if skipped:
+        print(f"Skipped: {skipped}")
+    print(f"{'='*40}")
+# ==================== CLI ====================
+def parse_args():
+    p = argparse.ArgumentParser(
+        description="Maze video dataset: generate, eval, verify"
+    )
+    sub = p.add_subparsers(dest="command", help="Sub-command")
+    # --- generate ---
+    gen = sub.add_parser("generate", help="Generate dataset")
+    gen.add_argument("--output-dir", type=str, default="maze")
+    gen.add_argument("--sizes", type=int, nargs="+", default=[8, 16, 24, 32])
+    gen.add_argument("--num-per-size", type=int, nargs="+", default=[100, 500, 1000, 2000])
+    gen.add_argument("--min-path-ratio", type=float, default=0.3,
+                     help="Min path length as fraction of size²")
+    gen.add_argument("--img-size", type=int, default=1024)
+    gen.add_argument("--prompt", type=str,
+                     default="Draw a continuous red line from the yellow dot "
+                             "to the blue dot, avoiding all walls.")
+    gen.add_argument("--train-ratio", type=float, default=0.9)
+    gen.add_argument("--n-start", type=int, default=2,
+                     help="Hold frames at video start (blank puzzle)")
+    gen.add_argument("--m-end", type=int, default=3,
+                     help="Hold frames at video end (completed solution)")
+    gen.add_argument("--frames", type=int, default=None,
+                     help="Content frames per video (None=auto 1 per step)")
+    gen.add_argument("--fps", type=int, default=10)
+    gen.add_argument("--seed", type=int, default=42)
+    gen.add_argument("--checkpoint-interval", type=int, default=50)
+    # --- eval ---
+    ev = sub.add_parser("eval",
+                        help="Evaluate result videos (last frame → extract → verify)")
+    ev.add_argument("video_dir", type=str,
+                    help="Directory containing result .mp4 files")
+    ev.add_argument("--text-dir", type=str, required=True,
+                    help="Directory with ground-truth maze .txt files")
+    ev.add_argument("--output-json", type=str, default=None,
+                    help="Output JSON for extracted paths (default: video_dir/0_result.json)")
+    ev.add_argument("--gt-json", type=str, default=None,
+                    help="Optional ground-truth path.json for length-binned accuracy")
+    # --- verify ---
+    ver = sub.add_parser("verify", help="Verify a pre-extracted JSON of UDRL paths")
+    ver.add_argument("json_file", type=str)
+    ver.add_argument("--text-dir", type=str, required=True,
+                     help="Directory with maze .txt files")
+    return p.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    if args.command == "generate":
+        kwargs = {k: v for k, v in vars(args).items() if k != "command"}
+        generate_dataset(**kwargs)
+    elif args.command == "eval":
+        eval_videos(
+            video_dir=args.video_dir,
+            text_dir=args.text_dir,
+            output_json=args.output_json,
+            gt_json=args.gt_json,
+        )
+    elif args.command == "verify":
+        verify_results(args.json_file, args.text_dir)
+    else:
+        print("Usage: python maze_video_gen.py {generate|eval|verify} [options]")
+        print("  python maze_video_gen.py generate --help")
+        print("  python maze_video_gen.py eval --help")
+        print("  python maze_video_gen.py verify --help")

maze/maze_processor.py ADDED Viewed

	@@ -0,0 +1,543 @@

+"""
+MazeProcessor - Maze generation, solving, rendering, and video frame generation.
+Mirrors the SudokuProcessor pattern: a single class encapsulating all maze logic
+including DFS generation, BFS solving, image/video rendering, path verification,
+and text serialization.
+"""
+import random
+from collections import deque
+from typing import List, Tuple, Optional, Dict
+import numpy as np
+from PIL import Image, ImageDraw
+# Wall bitmask encoding (matches text file format)
+WALL_MASKS = {"N": 1, "S": 2, "W": 4, "E": 8}
+OPPOSITE = {"N": "S", "S": "N", "E": "W", "W": "E"}
+MOVES = {
+    "U": (-1, 0, "N"),
+    "D": (1, 0, "S"),
+    "L": (0, -1, "W"),
+    "R": (0, 1, "E"),
+}
+NEIGHBORS = {
+    "N": (-1, 0),
+    "S": (1, 0),
+    "E": (0, 1),
+    "W": (0, -1),
+}
+# ======================== Grid Type ========================
+# grid[r][c] = {"N": bool, "S": bool, "W": bool, "E": bool}
+# True  => wall present, False => passage open
+Grid = List[List[Dict[str, bool]]]
+class MazeProcessor:
+    """Handles maze generation, solving, image rendering, and video frame creation."""
+    def __init__(self, img_size: int = 512):
+        self.img_size = img_size
+        # Rendering colours (RGB)
+        self.bg_color = "black"
+        self.cell_color = "white"
+        self.wall_color = "black"
+        self.grid_color = (224, 224, 224)
+        self.start_color = "yellow"
+        self.end_color = "blue"
+        self.path_color = "red"
+    # ==================== Generation (DFS) ====================
+    @staticmethod
+    def _empty_grid(n: int) -> Grid:
+        """Create an n×n grid with all walls present."""
+        return [
+            [{"N": True, "E": True, "S": True, "W": True} for _ in range(n)]
+            for _ in range(n)
+        ]
+    @staticmethod
+    def _remove_wall(grid: Grid, r: int, c: int, direction: str) -> None:
+        """Remove wall between (r,c) and its neighbour in *direction*."""
+        dr, dc = NEIGHBORS[direction]
+        grid[r][c][direction] = False
+        grid[r + dr][c + dc][OPPOSITE[direction]] = False
+    def generate(
+        self, size: int, min_path_len: int = 1, max_attempts: int = 200
+    ) -> Tuple[Grid, Tuple[int, int], Tuple[int, int], np.ndarray]:
+        """
+        Generate a perfect maze and a start/end pair whose shortest path
+        length >= *min_path_len*.
+        Returns:
+            (grid, start, end, path) where path is an (L, 2) int array.
+        """
+        for _ in range(max_attempts):
+            grid = self._gen_dfs(size)
+            nodes = [(r, c) for r in range(size) for c in range(size)]
+            start, end = random.sample(nodes, 2)
+            path = self.solve_bfs(grid, start, end)
+            if path is not None and len(path) >= min_path_len:
+                return grid, tuple(start), tuple(end), path
+        raise RuntimeError(
+            f"Failed to generate maze (size={size}, min_path_len={min_path_len}) "
+            f"after {max_attempts} attempts."
+        )
+    def _gen_dfs(self, n: int) -> Grid:
+        """Randomised DFS (iterative) to carve a perfect maze."""
+        grid = self._empty_grid(n)
+        visited = [[False] * n for _ in range(n)]
+        sr, sc = random.randrange(n), random.randrange(n)
+        visited[sr][sc] = True
+        stack = [(sr, sc)]
+        while stack:
+            r, c = stack[-1]
+            nbrs = []
+            for d, (dr, dc) in NEIGHBORS.items():
+                nr, nc = r + dr, c + dc
+                if 0 <= nr < n and 0 <= nc < n and not visited[nr][nc]:
+                    nbrs.append((d, nr, nc))
+            if nbrs:
+                d, nr, nc = random.choice(nbrs)
+                self._remove_wall(grid, r, c, d)
+                visited[nr][nc] = True
+                stack.append((nr, nc))
+            else:
+                stack.pop()
+        return grid
+    # ==================== Solving (BFS) ====================
+    def solve_bfs(
+        self, grid: Grid, start: Tuple[int, int], end: Tuple[int, int]
+    ) -> Optional[np.ndarray]:
+        """BFS shortest path. Returns (L,2) int ndarray or None."""
+        n = len(grid)
+        q: deque = deque([(start, [start])])
+        visited = {start}
+        while q:
+            (r, c), path = q.popleft()
+            if (r, c) == end:
+                return np.array(path, dtype=int)
+            cell = grid[r][c]
+            for d, (dr, dc) in NEIGHBORS.items():
+                nr, nc = r + dr, c + dc
+                if (
+                    0 <= nr < n
+                    and 0 <= nc < n
+                    and not cell[d]
+                    and (nr, nc) not in visited
+                ):
+                    visited.add((nr, nc))
+                    q.append(((nr, nc), path + [(nr, nc)]))
+        return None
+    # ==================== Path ↔ UDRL ====================
+    @staticmethod
+    def path_to_udrl(path) -> str:
+        """Convert coordinate path to UDRL string."""
+        moves = []
+        for i in range(len(path) - 1):
+            r1, c1 = path[i]
+            r2, c2 = path[i + 1]
+            if r2 < r1:
+                moves.append("U")
+            elif r2 > r1:
+                moves.append("D")
+            elif c2 < c1:
+                moves.append("L")
+            else:
+                moves.append("R")
+        return "".join(moves)
+    # ==================== Verification ====================
+    def verify_path(self, grid: Grid, start: Tuple, end: Tuple, udrl: str) -> bool:
+        """Verify that *udrl* is a wall-respecting walk from *start* to *end*."""
+        n = len(grid)
+        r, c = start
+        for ch in udrl.replace(",", "").replace(" ", "").strip():
+            if ch not in MOVES:
+                continue
+            dr, dc, wall = MOVES[ch]
+            if grid[r][c][wall]:
+                return False
+            nr, nc = r + dr, c + dc
+            if not (0 <= nr < n and 0 <= nc < n):
+                return False
+            r, c = nr, nc
+        return (r, c) == end
+    # ==================== Text Encoding ====================
+    def encode_grid(self, grid: Grid) -> str:
+        """Encode grid to compact bitmask string (one int per cell, row-major)."""
+        rows = []
+        for row in grid:
+            vals = []
+            for cell in row:
+                v = 0
+                for d, mask in WALL_MASKS.items():
+                    if cell[d]:
+                        v |= mask
+                vals.append(str(v))
+            rows.append(" ".join(vals))
+        return "\n".join(rows)
+    def decode_grid(self, text_lines: List[str]) -> Grid:
+        """Decode bitmask text lines back to grid dicts."""
+        grid = []
+        for line in text_lines:
+            row = []
+            for val_s in line.split():
+                val = int(val_s)
+                row.append({d: bool(val & mask) for d, mask in WALL_MASKS.items()})
+            grid.append(row)
+        return grid
+    def save_text(self, filepath, grid: Grid, start: Tuple, end: Tuple) -> None:
+        """Save maze to compact text file."""
+        n = len(grid)
+        with open(filepath, "w") as f:
+            f.write(f"{n}\n{start[0]} {start[1]}\n{end[0]} {end[1]}\n")
+            f.write(self.encode_grid(grid) + "\n")
+    def load_text(self, filepath) -> Optional[Dict]:
+        """
+        Load maze from text file.
+        Returns dict with keys: size, start, end, grid (dict-based),
+        grid_raw (list[list[int]] bitmask). None on failure.
+        """
+        try:
+            with open(filepath) as f:
+                lines = [l.strip() for l in f if l.strip()]
+            n = int(lines[0])
+            sr, sc = map(int, lines[1].split())
+            er, ec = map(int, lines[2].split())
+            grid = self.decode_grid(lines[3 : 3 + n])
+            grid_raw: List[List[int]] = []
+            for r in range(n):
+                grid_raw.append(list(map(int, lines[3 + r].split())))
+            return {
+                "size": n,
+                "start": (sr, sc),
+                "end": (er, ec),
+                "grid": grid,
+                "grid_raw": grid_raw,
+            }
+        except Exception:
+            return None
+    def fingerprint(self, grid: Grid, start: Tuple, end: Tuple) -> str:
+        """Content fingerprint for deduplication."""
+        n = len(grid)
+        parts = [f"{n},{start[0]},{start[1]},{end[0]},{end[1]}"]
+        for row in grid:
+            for cell in row:
+                v = sum(WALL_MASKS[d] for d in WALL_MASKS if cell[d])
+                parts.append(str(v))
+        return "|".join(parts)
+    # ==================== Image Rendering ====================
+    def _layout(self, n: int):
+        """Compute rendering layout parameters."""
+        cell_f = float(self.img_size) / n
+        wall_f = cell_f / 4.0
+        half_f = wall_f / 2.0
+        grid_w = max(1, int(cell_f / 16.0))
+        return cell_f, wall_f, half_f, grid_w
+    def render(
+        self,
+        grid: Grid,
+        start: Tuple[int, int],
+        end: Tuple[int, int],
+        path: Optional[np.ndarray] = None,
+        path_steps: Optional[int] = None,
+    ) -> Image.Image:
+        """
+        Render maze as a PIL image.
+        Args:
+            grid:       The maze grid.
+            start, end: Coordinates of start/end cells.
+            path:       Full solution path (L, 2).
+            path_steps: Draw only the first *path_steps* segments (for video).
+        Returns:
+            PIL.Image (RGB, img_size × img_size).
+        """
+        n = len(grid)
+        cell_f, wall_f, half_f, grid_w = self._layout(n)
+        img = Image.new("RGB", (self.img_size, self.img_size), self.bg_color)
+        draw = ImageDraw.Draw(img)
+        # --- fill cells & open passages ---
+        for r in range(n):
+            for c in range(n):
+                x1 = c * cell_f + half_f
+                y1 = r * cell_f + half_f
+                x2 = (c + 1) * cell_f - half_f
+                y2 = (r + 1) * cell_f - half_f
+                draw.rectangle([(x1, y1), (x2, y2)], fill=self.cell_color)
+                cell = grid[r][c]
+                if not cell["S"] and r < n - 1:
+                    draw.rectangle(
+                        [(x1, y2), (x2, y2 + wall_f)], fill=self.cell_color
+                    )
+                if not cell["E"] and c < n - 1:
+                    draw.rectangle(
+                        [(x2, y1), (x2 + wall_f, y2)], fill=self.cell_color
+                    )
+        # --- subtle grid lines on open passages ---
+        for r in range(n):
+            for c in range(n):
+                if r < n - 1 and not grid[r][c]["S"]:
+                    y = (r + 1) * cell_f
+                    draw.line(
+                        [(c * cell_f + half_f, y), ((c + 1) * cell_f - half_f, y)],
+                        fill=self.grid_color, width=grid_w,
+                    )
+                if c < n - 1 and not grid[r][c]["E"]:
+                    x = (c + 1) * cell_f
+                    draw.line(
+                        [(x, r * cell_f + half_f), (x, (r + 1) * cell_f - half_f)],
+                        fill=self.grid_color, width=grid_w,
+                    )
+        # --- start / end dots ---
+        def _dot(rc, color):
+            rr, cc = rc
+            cx = cc * cell_f + cell_f / 2
+            cy = rr * cell_f + cell_f / 2
+            rad = max(2, int((cell_f - wall_f) * 0.25))
+            draw.ellipse([cx - rad, cy - rad, cx + rad, cy + rad], fill=color)
+        _dot(start, self.start_color)
+        _dot(end, self.end_color)
+        # --- solution path (optionally partial) ---
+        if path is not None and len(path) >= 2:
+            end_idx = (
+                len(path) if path_steps is None
+                else min(path_steps + 1, len(path))
+            )
+            if end_idx >= 2:
+                pts = [
+                    (c * cell_f + cell_f / 2, r * cell_f + cell_f / 2)
+                    for r, c in path[:end_idx]
+                ]
+                draw.line(
+                    pts, fill=self.path_color,
+                    width=max(1, int(wall_f)), joint="curve",
+                )
+        return img
+    # ==================== Video Frame Generation ====================
+    def generate_video_frames(
+        self,
+        grid: Grid,
+        start: Tuple[int, int],
+        end: Tuple[int, int],
+        path: np.ndarray,
+        n_start: int = 5,
+        m_end: int = 5,
+        frames: Optional[int] = None,
+    ) -> List[Image.Image]:
+        """
+        Generate progressive video frames showing the red line growing.
+        *frames* controls the number of **content frames** between holds:
+        - None           → 1 per path step
+        - frames > steps → slow-motion
+        - frames < steps → fast-forward
+        Total length = n_start + content_frames + m_end.
+        """
+        n_steps = len(path) - 1
+        if n_steps <= 0:
+            blank = self.render(grid, start, end)
+            return [blank] * (n_start + m_end + 1)
+        content_frames = frames if frames is not None else n_steps
+        content_frames = max(1, content_frames)
+        result: List[Image.Image] = []
+        # Opening hold
+        blank = self.render(grid, start, end)
+        result.extend([blank.copy() for _ in range(n_start)])
+        # Content frames
+        if content_frames == n_steps:
+            for step in range(1, n_steps + 1):
+                result.append(
+                    self.render(grid, start, end, path=path, path_steps=step)
+                )
+        elif content_frames > n_steps:
+            for step in range(1, n_steps + 1):
+                f_lo = (step - 1) * content_frames // n_steps
+                f_hi = step * content_frames // n_steps
+                count = f_hi - f_lo
+                frame_img = self.render(
+                    grid, start, end, path=path, path_steps=step
+                )
+                result.append(frame_img)
+                if count > 1:
+                    result.extend([frame_img.copy() for _ in range(count - 1)])
+        else:
+            for f in range(content_frames):
+                step = (f + 1) * n_steps // content_frames
+                result.append(
+                    self.render(grid, start, end, path=path, path_steps=step)
+                )
+        # Closing hold
+        final = self.render(grid, start, end, path=path)
+        result.extend([final.copy() for _ in range(m_end)])
+        return result
+    # ==================== Red-Path Extraction ====================
+    def extract_path_from_pixels(
+        self,
+        pixels: np.ndarray,
+        grid_raw: List[List[int]],
+        size: int,
+        start: Tuple[int, int],
+        pixel_threshold: float = 0.01,
+    ) -> str:
+        """
+        Detect red path in an RGB pixel array and return UDRL.
+        Uses **floating-point** cell boundaries matching the renderer to avoid
+        misalignment on sizes that don't evenly divide the image (e.g. 24, 48).
+        Args:
+            pixels:          (H, W, 3) uint8 RGB array.
+            grid_raw:        Bitmask grid as list[list[int]].
+            size:            Maze dimension n.
+            start:           Start coordinate (r, c).
+            pixel_threshold: Min red-pixel fraction to mark a cell.
+        Returns:
+            UDRL action string.
+        """
+        img = Image.fromarray(pixels)
+        w, h = img.size
+        px = np.array(img, dtype=float)
+        r_ch, g_ch, b_ch = px[:, :, 0], px[:, :, 1], px[:, :, 2]
+        red_mask = (r_ch > 100) & (r_ch > g_ch * 1.2) & (r_ch > b_ch * 1.2)
+        # Use FLOAT cell size to match render() coordinate system exactly.
+        # Integer division (h // size) drifts by up to (size-1) * fractional
+        # pixels, causing the last cells to be completely misaligned.
+        cell_h_f = h / size
+        cell_w_f = w / size
+        path_grid = np.zeros((size, size), dtype=bool)
+        for r in range(size):
+            y0 = int(round(r * cell_h_f))
+            y1 = int(round((r + 1) * cell_h_f))
+            for c in range(size):
+                x0 = int(round(c * cell_w_f))
+                x1 = int(round((c + 1) * cell_w_f))
+                # Small inward margin to avoid wall / neighbour bleed-over
+                ch = y1 - y0
+                cw = x1 - x0
+                margin_y = max(1, int(ch * 0.15))
+                margin_x = max(1, int(cw * 0.15))
+                sub = red_mask[y0 + margin_y : y1 - margin_y,
+                               x0 + margin_x : x1 - margin_x]
+                if sub.size > 0 and np.mean(sub) > pixel_threshold:
+                    path_grid[r, c] = True
+        # Greedy walk from start, respecting maze walls
+        directions = [
+            ("R", MOVES["R"]),
+            ("D", MOVES["D"]),
+            ("L", MOVES["L"]),
+            ("U", MOVES["U"]),
+        ]
+        visited = {start}
+        cr, cc = start
+        actions: List[str] = []
+        for _ in range(size * size * 2):
+            found = False
+            wval = grid_raw[cr][cc]
+            for act, (dr, dc, wall_ch) in directions:
+                nr, nc = cr + dr, cc + dc
+                if 0 <= nr < size and 0 <= nc < size:
+                    if (wval & WALL_MASKS[wall_ch]) != 0:
+                        continue
+                    if path_grid[nr, nc] and (nr, nc) not in visited:
+                        visited.add((nr, nc))
+                        actions.append(act)
+                        cr, cc = nr, nc
+                        found = True
+                        break
+            if not found:
+                break
+        return "".join(actions)
+    def extract_path_from_image(
+        self, img_path: str, grid_raw: List[List[int]], size: int, start: Tuple
+    ) -> str:
+        """Extract UDRL from an image file (convenience wrapper)."""
+        try:
+            pixels = np.array(Image.open(img_path).convert("RGB"))
+            return self.extract_path_from_pixels(pixels, grid_raw, size, start)
+        except Exception:
+            return ""
+if __name__ == "__main__":
+    proc = MazeProcessor(img_size=512)
+    # Quick smoke test
+    grid, start, end, path = proc.generate(size=8, min_path_len=10)
+    n_steps = len(path) - 1
+    print(f"Maze 8×8 | path length {len(path)} | steps {n_steps}")
+    print(f"UDRL: {proc.path_to_udrl(path)}")
+    print(f"Verify: {proc.verify_path(grid, start, end, proc.path_to_udrl(path))}")
+    proc.render(grid, start, end).save("test_maze.png")
+    proc.render(grid, start, end, path=path).save("test_maze_solution.png")
+    # Test video frame modes
+    f1 = proc.generate_video_frames(grid, start, end, path, n_start=3, m_end=3)
+    assert len(f1) == 3 + n_steps + 3
+    f2 = proc.generate_video_frames(
+        grid, start, end, path, n_start=3, m_end=3, frames=n_steps * 3
+    )
+    assert len(f2) == 3 + n_steps * 3 + 3
+    half = max(1, n_steps // 2)
+    f3 = proc.generate_video_frames(
+        grid, start, end, path, n_start=3, m_end=3, frames=half
+    )
+    assert len(f3) == 3 + half + 3
+    print(f"frames=None  → {len(f1)} total ({n_steps} content)")
+    print(f"frames={n_steps*3:<4d}  → {len(f2)} total (slow-mo)")
+    print(f"frames={half:<4d}  → {len(f3)} total (fast-fwd)")
+    print("All assertions passed ✓")

sudoku/generate_dataset.py CHANGED Viewed

@@ -1,6 +1,11 @@
 """
 Sudoku Video Dataset Generator - Supports flexible solution count expressions per puzzle.
 With checkpoint/resume support via metadata.json.
 """
 import json
 import re
@@ -8,7 +13,7 @@ import random
 import argparse
 from dataclasses import dataclass, asdict
 from pathlib import Path
-from typing import List, Tuple, Optional, Union, Dict, Any
 import numpy as np
 import cv2
 from tqdm import tqdm
@@ -22,7 +27,7 @@ class SolRange:
     """Flexible solution count constraint for puzzle generation."""
     min_sol: int
     max_sol: Optional[int]
     @classmethod
     def parse(cls, expr: str) -> "SolRange":
         expr = expr.strip()
@@ -46,7 +51,7 @@ class SolRange:
             if n < 1: raise ValueError(f"sol_num must be >= 1, got {n}")
             return cls(min_sol=n, max_sol=n)
         raise ValueError(f"Invalid sol_num expression: '{expr}'")
     @property
     def is_exact(self): return self.max_sol is not None and self.min_sol == self.max_sol
     @property
@@ -77,10 +82,10 @@ class GenerationState:
     seen_grids: List[str]
     all_samples: List[Dict]
     completed: bool = False
     def to_dict(self) -> Dict:
         return asdict(self)
     @classmethod
     def from_dict(cls, d: Dict) -> "GenerationState":
         return cls(**d)
@@ -89,9 +94,7 @@ class GenerationState:
 def compute_params_hash(params: Dict) -> str:
     """Compute hash of generation parameters for consistency check."""
     import hashlib
-    # Only hash parameters that affect generation logic
-    key_params = {k: v for k, v in params.items()
-                  if k not in ['output_dir']}  # output_dir can differ
     return hashlib.md5(json.dumps(key_params, sort_keys=True).encode()).hexdigest()[:12]
@@ -100,21 +103,16 @@ def load_checkpoint(output_dir: Path, params: Dict) -> Optional[GenerationState]
     meta_path = output_dir / "metadata.json"
     if not meta_path.exists():
         return None
     with open(meta_path) as f:
         data = json.load(f)
     state = GenerationState.from_dict(data["state"])
     expected_hash = compute_params_hash(params)
     if state.params_hash != expected_hash:
         print(f"⚠️  Parameters changed (hash {state.params_hash} → {expected_hash}), starting fresh")
         return None
     if state.completed:
         print("✓ Generation already completed")
         return state
     print(f"✓ Resuming from checkpoint: {sum(state.clue_progress.values())} puzzles generated")
     return state
@@ -122,65 +120,136 @@ def load_checkpoint(output_dir: Path, params: Dict) -> Optional[GenerationState]
 def save_checkpoint(output_dir: Path, state: GenerationState, params: Dict):
     """Save current generation state to metadata.json."""
     meta_path = output_dir / "metadata.json"
-    data = {
-        "params": params,
-        "state": state.to_dict()
-    }
-    # Atomic write
     tmp_path = meta_path.with_suffix('.tmp')
     with open(tmp_path, 'w') as f:
-        json.dump(data, f, indent=2)
     tmp_path.rename(meta_path)
 # ==================== Core Functions ====================
 def get_fill_order(puzzle, solution):
     return [(i, j, solution[i][j]) for i in range(9) for j in range(9) if puzzle[i][j] == 0]
 def create_processor(resolution=None):
-    if resolution is None: return SudokuProcessor()
     target_size = min(resolution)
     cell_size = target_size // 9
     sf = cell_size / 60
-    return SudokuProcessor(cell_size=cell_size, font_scale=1.2*sf, thickness=max(1, int(2*sf)))
-def generate_video_frames(proc, puzzle, solution, n_start, m_end, k=1, max_frames=None):
     fills = get_fill_order(puzzle, solution)
     n_fills = len(fills)
-    effective_k = k
-    if max_frames is not None and n_start + n_fills * k + m_end > max_frames:
-        avail = max_frames - n_start - m_end
-        effective_k = max(1, avail // n_fills) if avail > 0 and n_fills > 0 else 1
-    frames = []
     current = [row[:] for row in puzzle]
     img = proc.render(current)
-    frames.extend([img.copy() for _ in range(n_start)])
-    for r, c, v in fills:
-        current[r][c] = v
-        frames.append(proc.render(current, highlight_new=(r, c), original=puzzle))
-        if effective_k > 1:
-            img = proc.render(current, original=puzzle)
-            frames.extend([img.copy() for _ in range(effective_k - 1)])
     img = proc.render(solution, original=puzzle)
-    frames.extend([img.copy() for _ in range(m_end)])
-    if max_frames is not None and len(frames) > max_frames:
-        frames = frames[:max_frames]
-    return frames
 def save_video(frames, path, fps=10):
     h, w = frames[0].shape[:2]
     writer = cv2.VideoWriter(str(path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-    for f in frames: writer.write(cv2.cvtColor(f, cv2.COLOR_RGB2BGR))
     writer.release()
 def normalize_num_per_clue(num_per_clue, clue_levels):
-    if isinstance(num_per_clue, int): return [num_per_clue] * len(clue_levels)
     if len(num_per_clue) != len(clue_levels):
-        raise ValueError(f"num_per_clue length ({len(num_per_clue)}) != clue_levels ({len(clue_levels)})")
     return num_per_clue
@@ -191,7 +260,7 @@ def generate_puzzle_with_range(proc, clue, sol_range, min_hamming):
     if sol_range.is_unique_only:
         puzzle, solution = proc.generate(clue, unique=True)
         return puzzle, [solution]
     if sol_range.requires_multi:
         try:
             puzzle, solutions = proc.generate_multi_solution(
@@ -204,7 +273,7 @@ def generate_puzzle_with_range(proc, clue, sol_range, min_hamming):
         except RuntimeError:
             pass
         return None
     try:
         puzzle, solutions = proc.generate_multi_solution(
             clue, min_solutions=max(2, sol_range.min_sol),
@@ -215,7 +284,7 @@ def generate_puzzle_with_range(proc, clue, sol_range, min_hamming):
             return puzzle, solutions
     except RuntimeError:
         pass
     if sol_range.allows_unique:
         puzzle, solution = proc.generate(clue, unique=True)
         return puzzle, [solution]
@@ -225,45 +294,48 @@ def generate_puzzle_with_range(proc, clue, sol_range, min_hamming):
 # ==================== Dataset Generation ====================
 def generate_dataset(
-    output_dir="sudoku_video", clue_levels=[30,40,50,60], num_per_clue=50,
-    sol_num="1", min_hamming=10, train_ratio=0.8,
     prompt="Solve this Sudoku puzzle using red font.",
-    n_start=10, m_end=10, k=1, max_frames=None, fps=10,
     resolution=None, seed=42, checkpoint_interval=50
 ):
     """
     Generate Sudoku video dataset with checkpoint/resume support.
     Args:
         checkpoint_interval: Save checkpoint every N puzzles (default: 50)
     """
-    # Prepare params dict for hashing
     params = {
         "clue_levels": clue_levels, "num_per_clue": num_per_clue,
         "sol_num": sol_num, "min_hamming": min_hamming, "train_ratio": train_ratio,
-        "prompt": prompt, "n_start": n_start, "m_end": m_end, "k": k,
-        "max_frames": max_frames, "fps": fps, "resolution": resolution, "seed": seed
     }
     output_dir = Path(output_dir)
     video_dir = output_dir / "videos"
     image_dir = output_dir / "images"
     video_dir.mkdir(parents=True, exist_ok=True)
     image_dir.mkdir(parents=True, exist_ok=True)
     # Try to resume from checkpoint
     state = load_checkpoint(output_dir, params)
     if state and state.completed:
-        return  # Already done
     sol_range = SolRange.parse(str(sol_num))
     proc = create_processor(resolution)
     actual_size = proc.img_size
     num_per_clue_list = normalize_num_per_clue(num_per_clue, clue_levels)
     max_puzzles = max(num_per_clue_list)
     num_width = len(str(max_puzzles))
     # Initialize or restore state
     if state is None:
         random.seed(seed)
@@ -274,138 +346,162 @@ def generate_dataset(
             all_samples=[]
         )
         print(f"Starting fresh generation with solution range: {sol_range}")
     else:
-        # Restore RNG state approximately by fast-forwarding
         random.seed(seed)
         for _ in range(sum(state.clue_progress.values()) * 10):
             random.random()
     seen_grids = set(state.seen_grids)
     all_samples = state.all_samples.copy()
     clue_progress = {int(k): v for k, v in state.clue_progress.items()}
     total_target = sum(num_per_clue_list)
     total_done = sum(clue_progress.values())
     stats_unique = sum(1 for s in all_samples if s["total_solutions"] == 1 and s["sol_idx"] == 0)
     stats_multi = sum(1 for s in all_samples if s["total_solutions"] > 1 and s["sol_idx"] == 0)
     puzzles_since_checkpoint = 0
     with tqdm(total=total_target, initial=total_done, desc="Total", unit="puzzle") as pbar_total:
         for clue, target_count in zip(clue_levels, num_per_clue_list):
             generated = clue_progress.get(clue, 0)
             if generated >= target_count:
-                continue  # This clue level is done
             max_attempts = (target_count - generated) * 20
-            with tqdm(total=target_count, initial=generated, desc=f"Clue {clue:2d}",
                       unit="puzzle", leave=False) as pbar_clue:
                 for _ in range(max_attempts):
                     if generated >= target_count:
                         break
                     result = generate_puzzle_with_range(proc, clue, sol_range, min_hamming)
                     if result is None:
                         continue
                     puzzle, solutions = result
                     fp = proc.encode(puzzle)
                     if fp in seen_grids:
                         continue
                     seen_grids.add(fp)
                     n_sols = len(solutions)
                     if n_sols == 1:
                         stats_unique += 1
                     else:
                         stats_multi += 1
                     img_name = f"clue{clue}_{generated:0{num_width}d}.png"
                     puzzle_img = proc.render(puzzle)
-                    cv2.imwrite(str(image_dir / img_name), cv2.cvtColor(puzzle_img, cv2.COLOR_RGB2BGR))
                     for si, sol in enumerate(solutions):
                         vid_name = f"clue{clue}_{generated:0{num_width}d}_sol{si}.mp4"
-                        frames = generate_video_frames(proc, puzzle, sol, n_start, m_end, k, max_frames)
-                        save_video(frames, video_dir / vid_name, fps)
-                        hdists = [proc._hamming(sol, solutions[j]) for j in range(n_sols) if j != si]
                         all_samples.append({
                             "prompt": prompt, "video": vid_name, "image": img_name,
                             "clue": clue, "puzzle": fp, "solution": proc.encode(sol),
                             "sol_idx": si, "total_solutions": n_sols,
-                            "frame_count": len(frames),
-                            "min_hamming_to_others": min(hdists) if hdists else 0
                         })
                     generated += 1
                     clue_progress[clue] = generated
                     puzzles_since_checkpoint += 1
                     pbar_clue.update(1)
                     pbar_total.update(1)
-                    # Periodic checkpoint
                     if puzzles_since_checkpoint >= checkpoint_interval:
                         state.clue_progress = clue_progress
                         state.seen_grids = list(seen_grids)
                         state.all_samples = all_samples
                         save_checkpoint(output_dir, state, params)
                         puzzles_since_checkpoint = 0
-            tqdm.write(f"Clue {clue}: {generated} puzzles, "
-                       f"{sum(1 for s in all_samples if s['clue'] == clue)} videos")
     # Final output
-    random.seed(seed + 1)  # Deterministic shuffle
     random.shuffle(all_samples)
     split_idx = int(len(all_samples) * train_ratio)
     def write_jsonl(samples, path):
         with open(path, 'w') as f:
             for s in samples:
                 json.dump(s, f)
                 f.write('\n')
     write_jsonl(all_samples[:split_idx], output_dir / "train.jsonl")
     write_jsonl(all_samples[split_idx:], output_dir / "test.jsonl")
     # Mark as completed
     state.clue_progress = clue_progress
     state.seen_grids = list(seen_grids)
     state.all_samples = all_samples
     state.completed = True
     save_checkpoint(output_dir, state, params)
     print(f"\n✓ Dataset complete: {output_dir}/")
     print(f"  Resolution: {actual_size}x{actual_size}")
     print(f"  Solution range: {sol_range}")
     print(f"  Puzzles: {len(seen_grids)} ({stats_unique} unique, {stats_multi} multi-sol)")
     print(f"  Videos: {len(all_samples)}")
     print(f"  Train: {split_idx}, Test: {len(all_samples) - split_idx}")
     hammings = [s["min_hamming_to_others"] for s in all_samples if s["min_hamming_to_others"] > 0]
     if hammings:
-        print(f"  Solution diversity: avg={np.mean(hammings):.1f}, min={min(hammings)}, max={max(hammings)}")
 def parse_resolution(s):
     w, h = map(int, s.lower().split('x'))
     return (w, h)
 def parse_args():
-    p = argparse.ArgumentParser(description="Generate Sudoku video dataset with resume support")
     p.add_argument("--output-dir", type=str, default="sudoku")
-    p.add_argument("--clue-levels", type=int, nargs="+", default=[20,30,40,50,60,70])
-    p.add_argument("--num-per-clue", type=int, nargs="+", default=[15000,10000,10000,5000,2000,1000])
     p.add_argument("--sol-num", type=str, default="<=3",
                    help="'1', '3', '>=1', '>1', '<=3', '<3', '2-5'")
     p.add_argument("--min-hamming", type=int, default=10)
     p.add_argument("--train-ratio", type=float, default=0.9)
-    p.add_argument("--prompt", type=str, default="Solve this Sudoku puzzle using red font.")
-    p.add_argument("--n-start", type=int, default=2)
-    p.add_argument("--m-end", type=int, default=3)
-    p.add_argument("--k", type=int, default=1)
-    p.add_argument("--max-frames", type=int, default=None)
     p.add_argument("--fps", type=int, default=10)
     p.add_argument("--resolution", type=str, default="1024x1024")
     p.add_argument("--seed", type=int, default=42)

 """
 Sudoku Video Dataset Generator - Supports flexible solution count expressions per puzzle.
 With checkpoint/resume support via metadata.json.
+The *frames* parameter replaces the old max_frames + k pair:
+  - frames=None  → 1 content frame per fill step (variable length)
+  - frames=N     → exactly N content frames; fills distributed evenly
+                   (slow-motion if N > fills, fast-forward if N < fills)
 """
 import json
 import re
 import argparse
 from dataclasses import dataclass, asdict
 from pathlib import Path
+from typing import List, Tuple, Optional, Dict
 import numpy as np
 import cv2
 from tqdm import tqdm
     """Flexible solution count constraint for puzzle generation."""
     min_sol: int
     max_sol: Optional[int]
     @classmethod
     def parse(cls, expr: str) -> "SolRange":
         expr = expr.strip()
             if n < 1: raise ValueError(f"sol_num must be >= 1, got {n}")
             return cls(min_sol=n, max_sol=n)
         raise ValueError(f"Invalid sol_num expression: '{expr}'")
     @property
     def is_exact(self): return self.max_sol is not None and self.min_sol == self.max_sol
     @property
     seen_grids: List[str]
     all_samples: List[Dict]
     completed: bool = False
     def to_dict(self) -> Dict:
         return asdict(self)
     @classmethod
     def from_dict(cls, d: Dict) -> "GenerationState":
         return cls(**d)
 def compute_params_hash(params: Dict) -> str:
     """Compute hash of generation parameters for consistency check."""
     import hashlib
+    key_params = {k: v for k, v in params.items() if k not in ['output_dir']}
     return hashlib.md5(json.dumps(key_params, sort_keys=True).encode()).hexdigest()[:12]
     meta_path = output_dir / "metadata.json"
     if not meta_path.exists():
         return None
     with open(meta_path) as f:
         data = json.load(f)
     state = GenerationState.from_dict(data["state"])
     expected_hash = compute_params_hash(params)
     if state.params_hash != expected_hash:
         print(f"⚠️  Parameters changed (hash {state.params_hash} → {expected_hash}), starting fresh")
         return None
     if state.completed:
         print("✓ Generation already completed")
         return state
     print(f"✓ Resuming from checkpoint: {sum(state.clue_progress.values())} puzzles generated")
     return state
 def save_checkpoint(output_dir: Path, state: GenerationState, params: Dict):
     """Save current generation state to metadata.json."""
     meta_path = output_dir / "metadata.json"
     tmp_path = meta_path.with_suffix('.tmp')
     with open(tmp_path, 'w') as f:
+        json.dump({"params": params, "state": state.to_dict()}, f, indent=2)
     tmp_path.rename(meta_path)
 # ==================== Core Functions ====================
 def get_fill_order(puzzle, solution):
+    """Return list of (row, col, value) for empty cells in row-major order."""
     return [(i, j, solution[i][j]) for i in range(9) for j in range(9) if puzzle[i][j] == 0]
 def create_processor(resolution=None):
+    """Create a SudokuProcessor with optional custom resolution."""
+    if resolution is None:
+        return SudokuProcessor()
     target_size = min(resolution)
     cell_size = target_size // 9
     sf = cell_size / 60
+    return SudokuProcessor(
+        cell_size=cell_size, font_scale=1.2 * sf, thickness=max(1, int(2 * sf))
+    )
+def generate_video_frames(proc, puzzle, solution, n_start, m_end, frames=None):
+    """
+    Generate progressive video frames for a Sudoku solve.
+    The *frames* parameter controls the number of **content frames**
+    (between the opening and closing holds):
+    - frames=None     → 1 content frame per fill step  (n_fills total)
+    - frames > fills  → multiple frames per fill step   (slow-motion)
+    - frames < fills  → multiple fills per frame         (fast-forward)
+    - frames == fills → identical to None
+    Total output length = n_start + content_frames + m_end.
+    Args:
+        proc:      SudokuProcessor instance.
+        puzzle:    9×9 puzzle grid (0 = empty).
+        solution:  9×9 solved grid.
+        n_start:   Hold frames for puzzle at the beginning.
+        m_end:     Hold frames for completed solution at the end.
+        frames:    Desired number of content frames (None = one per fill).
+    Returns:
+        List of numpy arrays (RGB images).
+    """
     fills = get_fill_order(puzzle, solution)
     n_fills = len(fills)
+    if n_fills == 0:
+        img = proc.render(solution, original=puzzle)
+        return [img.copy() for _ in range(n_start + m_end + 1)]
+    content_frames = frames if frames is not None else n_fills
+    content_frames = max(1, content_frames)
+    result = []
     current = [row[:] for row in puzzle]
+    # --- opening hold ---
     img = proc.render(current)
+    result.extend([img.copy() for _ in range(n_start)])
+    # --- content frames ---
+    if content_frames == n_fills:
+        # Exact 1:1 mapping
+        for r, c, v in fills:
+            current[r][c] = v
+            result.append(proc.render(current, highlight_new=(r, c), original=puzzle))
+    elif content_frames > n_fills:
+        # Slow-motion: distribute content_frames evenly across n_fills steps.
+        for i, (r, c, v) in enumerate(fills):
+            current[r][c] = v
+            f_lo = i * content_frames // n_fills
+            f_hi = (i + 1) * content_frames // n_fills
+            count = f_hi - f_lo  # >= 1
+            # First frame of this step shows highlight
+            result.append(proc.render(current, highlight_new=(r, c), original=puzzle))
+            # Remaining hold frames (no highlight)
+            if count > 1:
+                img = proc.render(current, original=puzzle)
+                result.extend([img.copy() for _ in range(count - 1)])
+    else:
+        # Fast-forward: each content frame applies multiple fills at once.
+        for f in range(content_frames):
+            prev_step = f * n_fills // content_frames
+            target_step = (f + 1) * n_fills // content_frames
+            last_r, last_c = None, None
+            for idx in range(prev_step, target_step):
+                r, c, v = fills[idx]
+                current[r][c] = v
+                last_r, last_c = r, c
+            if last_r is not None:
+                result.append(
+                    proc.render(current, highlight_new=(last_r, last_c), original=puzzle)
+                )
+            else:
+                result.append(proc.render(current, original=puzzle))
+    # --- closing hold ---
     img = proc.render(solution, original=puzzle)
+    result.extend([img.copy() for _ in range(m_end)])
+    return result
 def save_video(frames, path, fps=10):
+    """Save list of numpy RGB frames as mp4."""
     h, w = frames[0].shape[:2]
     writer = cv2.VideoWriter(str(path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+    for f in frames:
+        writer.write(cv2.cvtColor(f, cv2.COLOR_RGB2BGR))
     writer.release()
 def normalize_num_per_clue(num_per_clue, clue_levels):
+    """Broadcast single int to list, or validate list length."""
+    if isinstance(num_per_clue, int):
+        return [num_per_clue] * len(clue_levels)
     if len(num_per_clue) != len(clue_levels):
+        raise ValueError(
+            f"num_per_clue length ({len(num_per_clue)}) != clue_levels ({len(clue_levels)})"
+        )
     return num_per_clue
     if sol_range.is_unique_only:
         puzzle, solution = proc.generate(clue, unique=True)
         return puzzle, [solution]
     if sol_range.requires_multi:
         try:
             puzzle, solutions = proc.generate_multi_solution(
         except RuntimeError:
             pass
         return None
     try:
         puzzle, solutions = proc.generate_multi_solution(
             clue, min_solutions=max(2, sol_range.min_sol),
             return puzzle, solutions
     except RuntimeError:
         pass
     if sol_range.allows_unique:
         puzzle, solution = proc.generate(clue, unique=True)
         return puzzle, [solution]
 # ==================== Dataset Generation ====================
 def generate_dataset(
+    output_dir="sudoku", clue_levels=[20, 30, 40, 50, 60, 70],
+    num_per_clue=[15000, 10000, 10000, 5000, 2000, 1000],
+    sol_num="<=3", min_hamming=10, train_ratio=0.9,
     prompt="Solve this Sudoku puzzle using red font.",
+    n_start=2, m_end=3, frames=None, fps=10,
     resolution=None, seed=42, checkpoint_interval=50
 ):
     """
     Generate Sudoku video dataset with checkpoint/resume support.
+    The *frames* parameter controls the number of **content frames** per video:
+      - None   → one content frame per fill step (variable length per puzzle)
+      - N > 0  → exactly N content frames; fills distributed evenly
     Args:
         checkpoint_interval: Save checkpoint every N puzzles (default: 50)
     """
     params = {
         "clue_levels": clue_levels, "num_per_clue": num_per_clue,
         "sol_num": sol_num, "min_hamming": min_hamming, "train_ratio": train_ratio,
+        "prompt": prompt, "n_start": n_start, "m_end": m_end, "frames": frames,
+        "fps": fps, "resolution": resolution, "seed": seed
     }
     output_dir = Path(output_dir)
     video_dir = output_dir / "videos"
     image_dir = output_dir / "images"
     video_dir.mkdir(parents=True, exist_ok=True)
     image_dir.mkdir(parents=True, exist_ok=True)
     # Try to resume from checkpoint
     state = load_checkpoint(output_dir, params)
     if state and state.completed:
+        return
     sol_range = SolRange.parse(str(sol_num))
     proc = create_processor(resolution)
     actual_size = proc.img_size
     num_per_clue_list = normalize_num_per_clue(num_per_clue, clue_levels)
     max_puzzles = max(num_per_clue_list)
     num_width = len(str(max_puzzles))
     # Initialize or restore state
     if state is None:
         random.seed(seed)
             all_samples=[]
         )
         print(f"Starting fresh generation with solution range: {sol_range}")
+        print(f"  frames={'auto (1 per fill)' if frames is None else frames}, "
+              f"n_start={n_start}, m_end={m_end}, fps={fps}")
     else:
         random.seed(seed)
         for _ in range(sum(state.clue_progress.values()) * 10):
             random.random()
     seen_grids = set(state.seen_grids)
     all_samples = state.all_samples.copy()
     clue_progress = {int(k): v for k, v in state.clue_progress.items()}
     total_target = sum(num_per_clue_list)
     total_done = sum(clue_progress.values())
     stats_unique = sum(1 for s in all_samples if s["total_solutions"] == 1 and s["sol_idx"] == 0)
     stats_multi = sum(1 for s in all_samples if s["total_solutions"] > 1 and s["sol_idx"] == 0)
     puzzles_since_checkpoint = 0
     with tqdm(total=total_target, initial=total_done, desc="Total", unit="puzzle") as pbar_total:
         for clue, target_count in zip(clue_levels, num_per_clue_list):
             generated = clue_progress.get(clue, 0)
             if generated >= target_count:
+                continue
             max_attempts = (target_count - generated) * 20
+            with tqdm(total=target_count, initial=generated, desc=f"Clue {clue:2d}",
                       unit="puzzle", leave=False) as pbar_clue:
                 for _ in range(max_attempts):
                     if generated >= target_count:
                         break
                     result = generate_puzzle_with_range(proc, clue, sol_range, min_hamming)
                     if result is None:
                         continue
                     puzzle, solutions = result
                     fp = proc.encode(puzzle)
                     if fp in seen_grids:
                         continue
                     seen_grids.add(fp)
                     n_sols = len(solutions)
                     if n_sols == 1:
                         stats_unique += 1
                     else:
                         stats_multi += 1
                     img_name = f"clue{clue}_{generated:0{num_width}d}.png"
                     puzzle_img = proc.render(puzzle)
+                    cv2.imwrite(
+                        str(image_dir / img_name),
+                        cv2.cvtColor(puzzle_img, cv2.COLOR_RGB2BGR),
+                    )
                     for si, sol in enumerate(solutions):
                         vid_name = f"clue{clue}_{generated:0{num_width}d}_sol{si}.mp4"
+                        vid_frames = generate_video_frames(
+                            proc, puzzle, sol, n_start, m_end, frames
+                        )
+                        save_video(vid_frames, video_dir / vid_name, fps)
+                        hdists = [
+                            proc._hamming(sol, solutions[j])
+                            for j in range(n_sols) if j != si
+                        ]
                         all_samples.append({
                             "prompt": prompt, "video": vid_name, "image": img_name,
                             "clue": clue, "puzzle": fp, "solution": proc.encode(sol),
                             "sol_idx": si, "total_solutions": n_sols,
+                            "frame_count": len(vid_frames),
+                            "min_hamming_to_others": min(hdists) if hdists else 0,
                         })
                     generated += 1
                     clue_progress[clue] = generated
                     puzzles_since_checkpoint += 1
                     pbar_clue.update(1)
                     pbar_total.update(1)
                     if puzzles_since_checkpoint >= checkpoint_interval:
                         state.clue_progress = clue_progress
                         state.seen_grids = list(seen_grids)
                         state.all_samples = all_samples
                         save_checkpoint(output_dir, state, params)
                         puzzles_since_checkpoint = 0
+            tqdm.write(
+                f"Clue {clue}: {generated} puzzles, "
+                f"{sum(1 for s in all_samples if s['clue'] == clue)} videos"
+            )
     # Final output
+    random.seed(seed + 1)
     random.shuffle(all_samples)
     split_idx = int(len(all_samples) * train_ratio)
     def write_jsonl(samples, path):
         with open(path, 'w') as f:
             for s in samples:
                 json.dump(s, f)
                 f.write('\n')
     write_jsonl(all_samples[:split_idx], output_dir / "train.jsonl")
     write_jsonl(all_samples[split_idx:], output_dir / "test.jsonl")
     # Mark as completed
     state.clue_progress = clue_progress
     state.seen_grids = list(seen_grids)
     state.all_samples = all_samples
     state.completed = True
     save_checkpoint(output_dir, state, params)
     print(f"\n✓ Dataset complete: {output_dir}/")
     print(f"  Resolution: {actual_size}x{actual_size}")
     print(f"  Solution range: {sol_range}")
     print(f"  Puzzles: {len(seen_grids)} ({stats_unique} unique, {stats_multi} multi-sol)")
     print(f"  Videos: {len(all_samples)}")
     print(f"  Train: {split_idx}, Test: {len(all_samples) - split_idx}")
+    fcounts = [s["frame_count"] for s in all_samples]
+    print(f"  Frame counts: avg={np.mean(fcounts):.1f}, "
+          f"min={min(fcounts)}, max={max(fcounts)}")
     hammings = [s["min_hamming_to_others"] for s in all_samples if s["min_hamming_to_others"] > 0]
     if hammings:
+        print(f"  Solution diversity: avg={np.mean(hammings):.1f}, "
+              f"min={min(hammings)}, max={max(hammings)}")
 def parse_resolution(s):
     w, h = map(int, s.lower().split('x'))
     return (w, h)
 def parse_args():
+    p = argparse.ArgumentParser(
+        description="Generate Sudoku video dataset with resume support"
+    )
     p.add_argument("--output-dir", type=str, default="sudoku")
+    p.add_argument("--clue-levels", type=int, nargs="+",
+                   default=[20, 30, 40, 50, 60, 70])
+    p.add_argument("--num-per-clue", type=int, nargs="+",
+                   default=[15000, 10000, 10000, 5000, 2000, 1000])
     p.add_argument("--sol-num", type=str, default="<=3",
                    help="'1', '3', '>=1', '>1', '<=3', '<3', '2-5'")
     p.add_argument("--min-hamming", type=int, default=10)
     p.add_argument("--train-ratio", type=float, default=0.9)
+    p.add_argument("--prompt", type=str,
+                   default="Solve this Sudoku puzzle using red font.")
+    p.add_argument("--n-start", type=int, default=2,
+                   help="Hold frames for puzzle at video start")
+    p.add_argument("--m-end", type=int, default=3,
+                   help="Hold frames for completed solution at video end")
+    p.add_argument("--frames", type=int, default=None,
+                   help="Content frames per video. None=1 per fill (auto). "
+                        "If > fills: slow-motion. If < fills: fast-forward.")
     p.add_argument("--fps", type=int, default=10)
     p.add_argument("--resolution", type=str, default="1024x1024")
     p.add_argument("--seed", type=int, default=42)

sudoku/jsonl_to_csv.py CHANGED Viewed

@@ -2,11 +2,11 @@ import json
 import csv
 from pathlib import Path
-dataset='sudoku'
-split='train'
 # Load test data
-with open(f'{dataset}/{split}_info.jsonl', 'r') as f:
     data = [json.loads(line) for line in f]
 # Write to CSV
@@ -19,4 +19,7 @@ with open(f'{dataset}/{split}.csv', 'w', newline='', encoding='utf-8') as f:
             'images/' + item['image'],
             'videos/' + item['video'],
             item['prompt'],
-        ])

 import csv
 from pathlib import Path
+dataset='sudoku_large'
+split='test'
 # Load test data
+with open(f'{dataset}/{split}.jsonl', 'r') as f:
     data = [json.loads(line) for line in f]
 # Write to CSV
             'images/' + item['image'],
             'videos/' + item['video'],
             item['prompt'],
+        ])
+# Rename `{split}.jsonl' to `{split}_info.jsonl`
+Path(f'{dataset}/{split}.jsonl').rename(Path(f'{dataset}/{split}_info.jsonl'))