| |
| |
| |
| |
| |
|
|
| import os |
| import cv2 |
| import torch |
| import numpy as np |
| import sys |
| import shutil |
| from datetime import datetime |
| import glob |
| import gc |
| import time |
| from pathlib import Path |
| from argparse import ArgumentParser |
| from tqdm import tqdm |
| from tqdm.contrib.concurrent import process_map |
|
|
| sys.path.append("vggt/") |
|
|
| from visual_util import predictions_to_glb |
| from vggt.models.vggt import VGGT |
| from vggt.utils.load_fn import load_and_preprocess_images |
| from vggt.utils.pose_enc import pose_encoding_to_extri_intri |
| from vggt.utils.geometry import unproject_depth_map_to_point_map |
|
|
| from rec_utils.datasets import ARKitDataset |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
| |
| |
| def run_model(model, target_dir, device, max_images) -> dict: |
| """ |
| Run the VGGT model on images in the 'target_dir/images' folder and return predictions. |
| """ |
| print(f"Processing images from {target_dir}") |
|
|
| if not torch.cuda.is_available(): |
| raise ValueError("CUDA is not available. Check your environment.") |
|
|
|
|
| |
| image_names = [*target_dir.glob("*.jpg")] |
| image_names = sorted(image_names) |
| print(f"Found {len(image_names)} images") |
| if len(image_names) == 0: |
| raise ValueError(f"No images found at {target_dir}. Check your upload.") |
| if len(image_names) > max_images: |
| print(f"Downsampling {len(image_names)} images to {max_images} images") |
| image_names = [image_names[i] for i in np.linspace(0, len(image_names) - 1, max_images).round().astype(int)] |
| |
|
|
| images = load_and_preprocess_images(image_names).to(device) |
| print(f"Preprocessed images shape: {images.shape}") |
|
|
| |
| print("Running inference...") |
| dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16 |
|
|
| with torch.no_grad(): |
| with torch.cuda.amp.autocast(dtype=dtype): |
| predictions = model(images) |
|
|
| |
| print("Converting pose encoding to extrinsic and intrinsic matrices...") |
| extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:]) |
| predictions["poses"] = extrinsic |
| predictions["Ks"] = intrinsic |
|
|
| |
| for key in predictions.keys(): |
| if isinstance(predictions[key], torch.Tensor): |
| predictions[key] = predictions[key].cpu().numpy().squeeze(0) |
|
|
| |
| |
| |
| |
| |
|
|
| |
| torch.cuda.empty_cache() |
| predictions["image_names"] = [str(image_name) for image_name in image_names] |
| return predictions |
|
|
| def process_scene( |
| model, |
| scene_name, |
| input_dir, |
| output_dir, |
| device, |
| max_images=10000, |
| force=False |
| ): |
| """ |
| Perform reconstruction using the already-created target_dir/images. |
| """ |
|
|
| if not force and (output_dir / "predictions.npz").exists(): |
| print(f"Skipping scene {scene_name} because it already exists") |
| return |
|
|
| start_time = time.time() |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
|
|
| print("Running run_model...") |
| with torch.no_grad(): |
| predictions = run_model(model, input_dir, device, max_images) |
|
|
| |
|
|
| del predictions["images"] |
| |
| np.savez(output_dir / "predictions.npz", **predictions) |
|
|
| del predictions |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| end_time = time.time() |
|
|
| if __name__ == "__main__": |
| parser = ArgumentParser() |
| parser.add_argument("--scene_names", nargs="+", default=os.listdir("/workspace-SR006.nfs2/datasets/arkitscenes/offline_prepared_data/posed_images/")) |
| parser.add_argument("--input_dir", type=str, default='/workspace-SR006.nfs2/datasets/arkitscenes/offline_prepared_data/posed_images/') |
| parser.add_argument("--output_dir", type=str, default='output/arkit_250') |
| parser.add_argument("--max_images", type=int, default=250) |
| parser.add_argument("--conf_thres", type=float, default=3.0) |
| parser.add_argument("--job_num", "-n", type=int, default=1) |
| parser.add_argument("--job_id", "-i", type=int, default=0) |
| parser.add_argument("--device", type=str, default="2") |
| parser.add_argument("--force", action="store_true") |
| args = parser.parse_args() |
|
|
| model = VGGT() |
| _URL = "https://huggingface.co/facebook/VGGT-1B/resolve/main/model.pt" |
| model.load_state_dict(torch.hub.load_state_dict_from_url(_URL)) |
| model.eval() |
|
|
| scene_names = args.scene_names[args.job_id::args.job_num] |
| scene_names = ['47334096'] |
| device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu" |
|
|
| model = model.to(device) |
| from datetime import datetime |
| errors_path = Path(f"logs/errors_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt") |
| |
| for scene_name in tqdm(scene_names): |
| print(f"Processing scene {scene_name}") |
|
|
| input_dir = Path(args.input_dir) / scene_name |
| output_dir = Path(args.output_dir) / scene_name |
| output_dir.mkdir(parents=True, exist_ok=True) |
| try: |
| process_scene(model, scene_name, input_dir, output_dir, device=device, max_images=args.max_images, force=args.force) |
| except Exception as e: |
| print(f"Error processing scene {scene_name}: {e}") |
| errors_path.parent.mkdir(parents=True, exist_ok=True) |
| with open(errors_path, "a") as f: |
| f.write(f"{scene_name}\n") |
|
|