|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import cv2 |
|
|
import torch |
|
|
import numpy as np |
|
|
import sys |
|
|
import shutil |
|
|
from datetime import datetime |
|
|
import glob |
|
|
import gc |
|
|
import time |
|
|
from pathlib import Path |
|
|
from argparse import ArgumentParser |
|
|
from tqdm import tqdm |
|
|
from tqdm.contrib.concurrent import process_map |
|
|
|
|
|
sys.path.append("vggt/") |
|
|
|
|
|
from visual_util import predictions_to_glb |
|
|
from vggt.models.vggt import VGGT |
|
|
from vggt.utils.load_fn import load_and_preprocess_images |
|
|
from vggt.utils.pose_enc import pose_encoding_to_extri_intri |
|
|
from vggt.utils.geometry import unproject_depth_map_to_point_map |
|
|
|
|
|
from rec_utils.datasets import ARKitDataset |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_model(model, target_dir, device, max_images) -> dict: |
|
|
""" |
|
|
Run the VGGT model on images in the 'target_dir/images' folder and return predictions. |
|
|
""" |
|
|
print(f"Processing images from {target_dir}") |
|
|
|
|
|
if not torch.cuda.is_available(): |
|
|
raise ValueError("CUDA is not available. Check your environment.") |
|
|
|
|
|
|
|
|
|
|
|
image_names = [*target_dir.glob("*.jpg")] |
|
|
image_names = sorted(image_names) |
|
|
print(f"Found {len(image_names)} images") |
|
|
if len(image_names) == 0: |
|
|
raise ValueError(f"No images found at {target_dir}. Check your upload.") |
|
|
if len(image_names) > max_images: |
|
|
print(f"Downsampling {len(image_names)} images to {max_images} images") |
|
|
image_names = [image_names[i] for i in np.linspace(0, len(image_names) - 1, max_images).round().astype(int)] |
|
|
|
|
|
|
|
|
images = load_and_preprocess_images(image_names).to(device) |
|
|
print(f"Preprocessed images shape: {images.shape}") |
|
|
|
|
|
|
|
|
print("Running inference...") |
|
|
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16 |
|
|
|
|
|
with torch.no_grad(): |
|
|
with torch.cuda.amp.autocast(dtype=dtype): |
|
|
predictions = model(images) |
|
|
|
|
|
|
|
|
print("Converting pose encoding to extrinsic and intrinsic matrices...") |
|
|
extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:]) |
|
|
predictions["poses"] = extrinsic |
|
|
predictions["Ks"] = intrinsic |
|
|
|
|
|
|
|
|
for key in predictions.keys(): |
|
|
if isinstance(predictions[key], torch.Tensor): |
|
|
predictions[key] = predictions[key].cpu().numpy().squeeze(0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
torch.cuda.empty_cache() |
|
|
predictions["image_names"] = [str(image_name) for image_name in image_names] |
|
|
return predictions |
|
|
|
|
|
def process_scene( |
|
|
model, |
|
|
scene_name, |
|
|
input_dir, |
|
|
output_dir, |
|
|
device, |
|
|
max_images=10000, |
|
|
force=False |
|
|
): |
|
|
""" |
|
|
Perform reconstruction using the already-created target_dir/images. |
|
|
""" |
|
|
|
|
|
if not force and (output_dir / "predictions.npz").exists(): |
|
|
print(f"Skipping scene {scene_name} because it already exists") |
|
|
return |
|
|
|
|
|
start_time = time.time() |
|
|
gc.collect() |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
|
|
|
print("Running run_model...") |
|
|
with torch.no_grad(): |
|
|
predictions = run_model(model, input_dir, device, max_images) |
|
|
|
|
|
|
|
|
|
|
|
del predictions["images"] |
|
|
|
|
|
np.savez(output_dir / "predictions.npz", **predictions) |
|
|
|
|
|
del predictions |
|
|
gc.collect() |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
end_time = time.time() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = ArgumentParser() |
|
|
parser.add_argument("--scene_names", nargs="+", default=os.listdir("/workspace-SR006.nfs2/datasets/arkitscenes/offline_prepared_data/posed_images/")) |
|
|
parser.add_argument("--input_dir", type=str, default='/workspace-SR006.nfs2/datasets/arkitscenes/offline_prepared_data/posed_images/') |
|
|
parser.add_argument("--output_dir", type=str, default='output/arkit_250') |
|
|
parser.add_argument("--max_images", type=int, default=250) |
|
|
parser.add_argument("--conf_thres", type=float, default=3.0) |
|
|
parser.add_argument("--job_num", "-n", type=int, default=1) |
|
|
parser.add_argument("--job_id", "-i", type=int, default=0) |
|
|
parser.add_argument("--device", type=str, default="2") |
|
|
parser.add_argument("--force", action="store_true") |
|
|
args = parser.parse_args() |
|
|
|
|
|
model = VGGT() |
|
|
_URL = "https://huggingface.co/facebook/VGGT-1B/resolve/main/model.pt" |
|
|
model.load_state_dict(torch.hub.load_state_dict_from_url(_URL)) |
|
|
model.eval() |
|
|
|
|
|
scene_names = args.scene_names[args.job_id::args.job_num] |
|
|
scene_names = ['47334096'] |
|
|
device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
model = model.to(device) |
|
|
from datetime import datetime |
|
|
errors_path = Path(f"logs/errors_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt") |
|
|
|
|
|
for scene_name in tqdm(scene_names): |
|
|
print(f"Processing scene {scene_name}") |
|
|
|
|
|
input_dir = Path(args.input_dir) / scene_name |
|
|
output_dir = Path(args.output_dir) / scene_name |
|
|
output_dir.mkdir(parents=True, exist_ok=True) |
|
|
try: |
|
|
process_scene(model, scene_name, input_dir, output_dir, device=device, max_images=args.max_images, force=args.force) |
|
|
except Exception as e: |
|
|
print(f"Error processing scene {scene_name}: {e}") |
|
|
errors_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
with open(errors_path, "a") as f: |
|
|
f.write(f"{scene_name}\n") |
|
|
|