Spaces:

chenwang
/

physctrl

Running on Zero

App Files Files Community

physctrl / src /utils /load_utils.py

chenwang

update

4724018 16 days ago

raw

history blame contribute delete

10 kB

	import numpy as np
	import torch
	import gc
	from PIL import Image
	import sys
	import os

	# Add the project root directory to Python path (use absolute paths for robustness)
	project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	sys.path.append(project_root)
	sys.path.append(os.path.join(project_root, "libs"))
	sys.path.append(os.path.join(project_root, "libs", "LGM"))
	sys.path.append(os.path.join(project_root, "libs", "das"))
	sys.path.append(os.path.join(project_root, "src"))

	from sv3d.diffusers_sv3d import SV3DUNetSpatioTemporalConditionModel, StableVideo3DDiffusionPipeline
	from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
	from diffusers import AutoencoderKL, EulerDiscreteScheduler, DDPMScheduler, DDIMScheduler
	from diffusers.utils import export_to_gif, export_to_video
	from kiui.cam import orbit_camera
	from safetensors.torch import load_file
	from omegaconf import OmegaConf

	from LGM.core.models import LGM
	from LGM.core.options import AllConfigs
	from LGM.core.gs import GaussianRenderer
	from .track_utils.visualize_tracks import visualize_tracks
	from .track_utils.preprocessing import track_first, find_and_remove_nearest_point
	from .interpolate import interpolate_points
	from das.models.pipelines import DiffusionAsShaderPipeline

	import h5py
	import tyro
	from tqdm import tqdm
	from options import TestingConfig
	from pipeline_traj import TrajPipeline
	from model.spacetime import MDM_ST
	from argparse import Namespace

	def load_sv3d_pipeline(device, model_path="chenguolin/sv3d-diffusers"):
	unet = SV3DUNetSpatioTemporalConditionModel.from_pretrained(model_path, subfolder="unet")
	vae = AutoencoderKL.from_pretrained(model_path, subfolder="vae")
	scheduler = EulerDiscreteScheduler.from_pretrained(model_path, subfolder="scheduler")
	image_encoder = CLIPVisionModelWithProjection.from_pretrained(model_path, subfolder="image_encoder")
	feature_extractor = CLIPImageProcessor.from_pretrained(model_path, subfolder="feature_extractor")
	pipeline = StableVideo3DDiffusionPipeline(
	image_encoder=image_encoder, feature_extractor=feature_extractor,
	unet=unet, vae=vae,
	scheduler=scheduler,
	).to(device)
	return pipeline

	def load_LGM(opt, device, lgm_ckpt_path="./checkpoints/lgm_fp16.safetensors"):
	model = LGM(opt)
	ckpt = load_file(lgm_ckpt_path, device='cpu')
	model.load_state_dict(ckpt, strict=False)
	model = model.half().to(device)
	model.eval()
	return model

	def load_diffusion(device, model_cfg_path, diffusion_ckpt_path, seed=0):
	schema = OmegaConf.structured(TestingConfig)
	cfg = OmegaConf.load(model_cfg_path)
	cfg = OmegaConf.merge(schema, cfg)
	n_training_frames = cfg.train_dataset.n_training_frames
	n_frames_interval = cfg.train_dataset.n_frames_interval
	norm_fac = cfg.train_dataset.norm_fac

	model = MDM_ST(cfg.pc_size, n_training_frames, n_feats=3, model_config=cfg.model_config).to(device)

	ckpt = load_file(diffusion_ckpt_path, device='cpu')
	model.load_state_dict(ckpt, strict=False)
	model.eval().requires_grad_(False)
	noise_scheduler = DDIMScheduler(num_train_timesteps=1000, prediction_type='sample', clip_sample=False)
	pipeline = TrajPipeline(model=model, scheduler=noise_scheduler)
	return pipeline

	def gen_tracking_video(base_dir):

	animated_points = np.load(f'{base_dir}/gen_data.npy')
	animated_points = animated_points * 2
	new_animate_points = np.zeros((49, 2048, 3))
	for i in range(47):
	if i % 2 == 0:
	new_animate_points[i + 1] = animated_points[i // 2]
	else:
	new_animate_points[i + 1] = (animated_points[i // 2] + animated_points[i // 2 + 1]) / 2
	new_animate_points[0] = new_animate_points[1]
	new_animate_points[48] = new_animate_points[47]
	animated_points = new_animate_points

	projection_matrix = np.load(f'{base_dir}/projection.npy')
	crop_info = np.load(f'{base_dir}/crop_info.npy')
	center = np.load(f'{base_dir}/center.npy')
	scale = np.load(f'{base_dir}/scale.npy')
	animated_points = (animated_points / scale) + center

	## Aligned to Gaussian points at this moment
	print(animated_points.mean(), animated_points.std(), animated_points.max(), animated_points.min())
	device = torch.device("cuda")
	sys.argv = ['pipeline_track_gen.py', 'big']
	opt = tyro.cli(AllConfigs)

	scale_factor = 2
	focal = 0.5 * opt.output_size / np.tan(np.deg2rad(opt.fovy) / 2)
	new_fovy_rad = scale_factor * np.arctan(opt.output_size / focal)
	new_fovy_deg = np.rad2deg(new_fovy_rad)
	opt.fovy = new_fovy_deg
	opt.output_size *= scale_factor # Expand canvas size by 2

	gs = GaussianRenderer(opt)
	gaussians = gs.load_ply(f'{base_dir}/point_cloud.ply', compatible=True).to(device).float()
	idx = torch.from_numpy(np.load(f'{base_dir}/idx.npy')).to(device)
	gaussian_pos = gaussians[:, :3].contiguous()
	drive_x = gaussian_pos[idx]
	cdist = -1.0 * torch.cdist(gaussian_pos, drive_x) # [N, 2048]
	_, topk_index = torch.topk(cdist, 8, -1)

	cam_poses = torch.from_numpy(orbit_camera(0, 0, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
	cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
	cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
	cam_view_proj = cam_view @ gs.proj_matrix.to(device) # [V, 4, 4]
	cam_pos = - cam_poses[:, :3, 3] # [V, 3]

	pos = []

	for i in tqdm(range(0, 49, 1)):
	drive_current = torch.from_numpy(animated_points[i]).to(device).float()
	ret_points, new_rotation = interpolate_points(gaussian_pos, gaussians[:, 7:11], drive_x, drive_current, topk_index)
	gaussians_new = gaussians.clone()
	gaussians_new[:, :3] = ret_points
	gaussians_new[:, 7:11] = new_rotation
	pos.append(ret_points.cpu().numpy())

	# with torch.no_grad():
	# ret = gs.render(gaussians_new.unsqueeze(0), cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=1)
	# mask = (ret['alpha'][0,0].permute(1, 2, 0).contiguous().float().cpu().numpy() * 255.0).astype(np.uint8)
	# image = (ret['image'][0, 0].permute(1, 2, 0).contiguous().float().cpu().numpy()*255.0).astype(np.uint8)
	# image_save = np.concatenate([image, mask], axis=-1)

	# h_begin, w_begin, res = crop_info[0], crop_info[1], crop_info[2]
	# h_begin = h_begin - (256 * scale_factor - 256)
	# w_begin = w_begin - (256 * scale_factor - 256)
	# image_save = Image.fromarray(image_save).resize((res * scale_factor, res * scale_factor), Image.LANCZOS)

	template_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'templates', 'tracks_template.npy')
	track_template = np.load(template_path, allow_pickle=True)
	tracks = track_template.item()['tracks']
	tracks_output = tracks.copy()
	tracks_init = tracks[0, 0]
	track_idx = []
	mask = np.zeros(tracks_init.shape[0], dtype=bool)

	for i in tqdm(range(49)):

	# points = animated_points[i]
	points = pos[i]

	projected_points = (projection_matrix.T @ np.hstack((points, np.ones((points.shape[0], 1)))).T).T
	projected_points_weights = 1. / (projected_points[:, -1:] + 1e-8)
	projected_points = (projected_points * projected_points_weights)[:, :-1]

	h_begin, w_begin, res = crop_info[0], crop_info[1], crop_info[2]
	image_shape = (res, res) # Example image shape (H, W)
	projected_points[:, :2] = ((projected_points[:, :2] + 1) * image_shape[1] - 1) / 2
	projected_points[:, 0] += w_begin
	projected_points[:, 1] += h_begin

	if i == 0:
	track_point_candidates = track_first(projected_points, (480, 720))
	for j in range(tracks_init.shape[0]):
	x, y = tracks_init[j, 0], tracks_init[j, 1]
	target = np.array([x, y])
	candidate, track_point_candidates = find_and_remove_nearest_point(target, track_point_candidates)
	if candidate is not None:
	track_idx.append(candidate[3].astype(np.int32))
	mask[j] = True

	tracks_output[0, i, mask] = projected_points[track_idx]
	tracks_output[0, i, ~mask, :2] = tracks_output[0, 0, ~mask, :2]
	tracks_output[0, i, ~mask, 2] = 2

	track_template.item()['tracks'] = tracks_output
	# track_template.item()['drag_points'] = np.stack(drag_points, axis=0)
	sub_name = 'tracks_gen'
	sub_dir = f'{base_dir}/{sub_name}'
	os.makedirs(sub_dir, exist_ok=True)

	np.save(f'{sub_dir}/tracks.npy', track_template)
	args = Namespace(tracks_dir=sub_dir, output_dir=sub_dir, output_fps=24, point_size=10, len_track=0, num_frames=49, video_path=None)
	visualize_tracks(tracks_dir=sub_dir, output_dir=sub_dir, args=args)

	def load_das(gpu_id, output_dir):
	das = DiffusionAsShaderPipeline(gpu_id=gpu_id, output_dir=output_dir)
	return das

	def normalize_points(output_dir, fluid=False):
	from .transform import transform2origin, shift2center
	import trimesh
	from torch_cluster import fps

	device = 'cuda'

	pc_path = f'{output_dir}/point_cloud.ply'
	pc = trimesh.load_mesh(pc_path)
	points = pc.vertices
	points = np.array(points)
	points, center, scale = transform2origin(points, size=1)
	N = 2048
	grid_center = [5, 5, 5]
	drag_size = [0.4, 0.4, 0.4]

	points = shift2center(points, center=grid_center)
	points = torch.tensor(points, dtype=torch.float32, device=device).contiguous()
	np.save(f'{output_dir}/center.npy', center)
	np.save(f'{output_dir}/scale.npy', scale)
	ratio_N = N / points.shape[0]
	idx = fps(points, ratio=ratio_N, random_start=True)
	points = points[idx].cpu().numpy()
	np.save(f'{output_dir}/idx.npy', idx.cpu().numpy())
	return points, center, scale