Spaces:

bartduis
/

rayst3r

Running on Zero

App Files Files Community

rayst3r / app.py

bartduis

Update app.py

d2e8abe verified 7 months ago

raw

history blame

8.03 kB

	from spaces import GPU
	import torch

	def dummy_warmup():
	if torch.cuda.is_available():
	print("[INFO] CUDA is available. Running warmup.")
	# Run any GPU warm-up or dummy CUDA calls here
	x = torch.tensor([1.0]).cuda()
	else:
	print("[WARNING] CUDA not available. Skipping warmup.")

	import os, shutil
	import numpy as np
	import gradio as gr
	import rembg
	import trimesh
	from moge.model.v1 import MoGeModel
	from utils.geometry import compute_pointmap
	import cv2
	from huggingface_hub import hf_hub_download
	from PIL import Image
	import matplotlib.pyplot as plt
	from eval_wrapper.eval import EvalWrapper, eval_scene


	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	outdir = "/tmp/rayst3r"
	print("Loading MoGe model")
	moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(device)
	dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg")
	dino_model.eval()
	dino_model.to(device)

	print("Loading RaySt3R model")
	rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth")
	rayst3r_model = EvalWrapper(rayst3r_checkpoint,device='cpu')
	rayst3r_model = rayst3r_model.to(device)
	print("Loaded rayst3r_model")




	# Load the model from huggingface hub (or load from local).


	def depth2uint16(depth):
	return depth * torch.iinfo(torch.uint16).max / 10.0 # threshold is in m, convert to uint16 value
	#
	def save_tensor_as_png(tensor: torch.Tensor, path: str, dtype: torch.dtype \| None = None):
	if dtype is None:
	dtype = tensor.dtype
	Image.fromarray(tensor.to(dtype).cpu().numpy()).save(path)

	def colorize_points_with_turbo_all_dims(points, method='norm',cmap='turbo'):
	"""
	Assigns colors to 3D points using the 'turbo' colormap based on a scalar computed from all 3 dimensions.

	Args:
	points (np.ndarray): (N, 3) array of 3D points.
	method (str): Method for reducing 3D point to scalar. Options: 'norm', 'pca'.

	Returns:
	np.ndarray: (N, 3) RGB colors in [0, 1].
	"""
	assert points.shape[1] == 3, "Input must be of shape (N, 3)"

	if method == 'norm':
	scalar = np.linalg.norm(points, axis=1)
	elif method == 'pca':
	# Project onto first principal component
	mean = points.mean(axis=0)
	centered = points - mean
	u, s, vh = np.linalg.svd(centered, full_matrices=False)
	scalar = centered @ vh[0] # Project onto first principal axis
	else:
	raise ValueError(f"Unknown method '{method}'")

	# Normalize scalar to [0, 1]
	scalar_min, scalar_max = scalar.min(), scalar.max()
	normalized = (scalar - scalar_min) / (scalar_max - scalar_min + 1e-8)

	# Apply turbo colormap
	cmap = plt.colormaps.get_cmap(cmap)
	colors = cmap(normalized)[:, :3] # Drop alpha

	return colors

	def prep_for_rayst3r(img,depth_dict,mask):
	H, W = img.shape[:2]
	intrinsics = depth_dict["intrinsics"].detach().cpu()
	intrinsics[0] *= W
	intrinsics[1] *= H

	input_dir = os.path.join(outdir, "input")
	if os.path.exists(input_dir):
	shutil.rmtree(input_dir)
	os.makedirs(input_dir, exist_ok=True)
	# save intrinsics
	torch.save(intrinsics, os.path.join(input_dir, "intrinsics.pt"))

	# save depth
	depth = depth_dict["depth"].cpu()
	depth = depth2uint16(depth)
	save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16)

	# save mask as bool
	save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool)
	# save image
	save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png"))

	@GPU(duration = 120)
	def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False):
	prep_for_rayst3r(img,depth_dict,mask)

	rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model, device = device,set_conf=10).cpu()
	# subsample points
	n_points = min(max_total_points,rayst3r_points.shape[0])
	rayst3r_points = rayst3r_points[torch.randperm(rayst3r_points.shape[0])[:n_points]].numpy()

	rayst3r_points[:,1] = -rayst3r_points[:,1]
	rayst3r_points[:,2] = -rayst3r_points[:,2]

	# make all points red
	colors = colorize_points_with_turbo_all_dims(rayst3r_points)

	# load the input glb
	scene = trimesh.Scene()
	pct = trimesh.PointCloud(rayst3r_points, colors=colors, radius=0.01)
	scene.add_geometry(pct)

	outfile = os.path.join(outdir, "rayst3r.glb")
	scene.export(outfile)
	return outfile


	def input_to_glb(outdir,img,depth_dict,mask,rotated=False):
	H, W = img.shape[:2]
	intrinsics = depth_dict["intrinsics"].cpu().numpy()
	intrinsics[0] *= W
	intrinsics[1] *= H

	depth = depth_dict["depth"].cpu().numpy()
	cam2world = np.eye(4)
	points_world = compute_pointmap(depth, cam2world, intrinsics)

	scene = trimesh.Scene()
	pts = np.concatenate([p[m] for p,m in zip(points_world,mask)])
	col = np.concatenate([c[m] for c,m in zip(img,mask)])

	pts = pts.reshape(-1,3)
	pts[:,1] = -pts[:,1]
	pts[:,2] = -pts[:,2]


	pct = trimesh.PointCloud(pts, colors=col.reshape(-1,3))
	scene.add_geometry(pct)

	outfile = os.path.join(outdir, "input.glb")
	scene.export(outfile)
	return outfile

	@GPU(duration = 10)
	def depth_moge(input_img):

	input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device=device).permute(2, 0, 1)
	output = moge_model.infer(input_img_torch)
	return output

	@GPU(duration = 10)
	def mask_rembg(input_img):
	#masked_img = rembg.remove(input_img,)
	output_img = rembg.remove(input_img, alpha_matting=False, post_process_mask=True)

	# Convert to NumPy array
	output_np = np.array(output_img)
	alpha = output_np[..., 3]

	# Step 2: Erode the alpha mask to shrink object slightly
	kernel = np.ones((3, 3), np.uint8) # Adjust size for aggressiveness
	eroded_alpha = cv2.erode(alpha, kernel, iterations=1)
	# Step 3: Replace alpha channel
	output_np[..., 3] = eroded_alpha

	mask = output_np[:,:,-1] >= 128
	rgb = output_np[:,:,:3]
	return mask, rgb

	@GPU(duration = 10)
	def process_image(input_img):

	rotated = False
	if input_img.shape[0] > input_img.shape[1]:
	input_img = cv2.rotate(input_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
	rotated = True

	input_img = cv2.resize(input_img, (640, 480))
	mask, rgb = mask_rembg(input_img)
	depth_dict = depth_moge(input_img)
	if os.path.exists(outdir):
	shutil.rmtree(outdir)
	os.makedirs(outdir)
	input_glb = input_to_glb(outdir,input_img,depth_dict,mask,rotated=rotated)
	inference_glb = rayst3r_to_glb(input_img,depth_dict,mask,rotated=rotated)
	# print(input_glb)
	return input_glb, inference_glb


	with gr.Blocks(delete_cache=(600, 600)) as demo:
	gr.Markdown("""
	## Image to 3D in cluttered scene with [RaySt3R](https://rayst3r.github.io/)
	* Upload an image and click generate.
	* We mask the input using [Rembg](https://github.com/danielgatis/rembg) and generate depth with [MoGe](https://github.com/microsoft/MoGe).
	* The input point cloud will be visible in 'input', the output is visible in the "RaySt3R" section.

	[Project Website](https://rayst3r.github.io/)
	""")

	with gr.Row():
	input_image = gr.Image(type="filepath")
	input_model = gr.Model3D(label="Input")
	output_model = gr.Model3D(label="RaySt3R")

	generate_btn = gr.Button("Generate")

	# Link button to function
	generate_btn.click(
	fn=process_image,
	inputs=[input_image],
	outputs=[input_model, output_model],
	)

	# Add examples
	gr.Examples(
	examples=[
	["example_scene/ycb.png"], # path to your example image
	],
	inputs=[input_image],
	outputs=[input_model, output_model],
	fn=process_image,
	cache_examples=True,
	)

	if __name__ == "__main__":
	dummy_warmup()
	demo.launch()