"""Pre-extract COLMAP point clouds for all scenes and save as .npz files.

Run once before training to avoid slow COLMAP parsing at every iteration:

    python scripts/preextract_colmap_npz.py --root <path/to/scenes> [--normalize] [--workers 8]

Each scene directory is expected to contain a `sparse/0/` (or `sparse/`)
sub-directory with the standard COLMAP binary model files.

For every scene a file `colmap_points_cache.npz` (or
`colmap_points_cache_norm.npz` when --normalize is used) is written next to
the scene directory.  The InitializerColmap class will pick these files up
automatically and skip the full SceneManager parse.
"""

import argparse
import concurrent.futures
import os
import sys
import traceback
from pathlib import Path

import numpy as np

# Make sure the project root is on sys.path so that src.* imports work.
PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT))

from optgs.dataset.colmap.utils import Parser


def _npz_path(scene_dir: Path, normalize: bool) -> Path:
    suffix = "_norm" if normalize else ""
    return scene_dir / f"colmap_points_cache{suffix}.npz"


def process_scene(scene_dir: Path, normalize: bool, overwrite: bool) -> str:
    npz = _npz_path(scene_dir, normalize)
    if npz.exists() and not overwrite:
        return f"SKIP  {scene_dir.name}"
    try:
        parser = Parser(
            data_dir=str(scene_dir),
            factor=1,
            normalize=normalize,
            load_images=False,
            dl3dv_settings=False,
            verbose=False,
        )
        np.savez_compressed(
            npz,
            points=parser.points,
            points_rgb=parser.points_rgb,
            camtoworlds=parser.camtoworlds,
        )
        return f"OK    {scene_dir.name}  ({parser.points.shape[0]} pts)"
    except Exception as e:
        return f"ERROR {scene_dir.name}: {e}\n{traceback.format_exc()}"


def find_scene_dirs(root: Path) -> list[Path]:
    """Return all direct children of root that look like a COLMAP scene."""
    scenes = []
    for child in sorted(root.iterdir()):
        if not child.is_dir():
            continue
        sparse = child / "sparse" / "0"
        if not sparse.exists():
            sparse = child / "sparse"
        if sparse.exists():
            scenes.append(child)
    return scenes


def main():
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("--root", required=True, type=Path, help="Root directory containing one sub-dir per scene.")
    parser.add_argument("--normalize", action="store_true", help="Apply world-space normalisation (matches normalize_world_space: true in config).")
    parser.add_argument("--overwrite", action="store_true", help="Re-extract even if .npz already exists.")
    parser.add_argument("--workers", type=int, default=4, help="Number of parallel workers (default: 4).")
    args = parser.parse_args()

    root: Path = args.root.resolve()
    if not root.exists():
        print(f"Root directory does not exist: {root}", file=sys.stderr)
        sys.exit(1)

    scenes = find_scene_dirs(root)
    if not scenes:
        print(f"No COLMAP scene directories found under {root}", file=sys.stderr)
        sys.exit(1)

    print(f"Found {len(scenes)} scenes under {root}")
    print(f"normalize={args.normalize}  overwrite={args.overwrite}  workers={args.workers}\n")

    ok = skip = error = 0
    with concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) as pool:
        futures = {pool.submit(process_scene, s, args.normalize, args.overwrite): s for s in scenes}
        for i, fut in enumerate(concurrent.futures.as_completed(futures), 1):
            msg = fut.result()
            prefix = msg[:5].strip()
            if prefix == "OK":
                ok += 1
            elif prefix == "SKIP":
                skip += 1
            else:
                error += 1
            print(f"[{i}/{len(scenes)}] {msg}")

    print(f"\nDone. OK={ok}  skipped={skip}  errors={error}")


if __name__ == "__main__":
    main()