Spaces:

singlecell
/

jiggle-physics

Running on Zero

jiggle-physics / reconstruction.py

Justin Wood

Initial backend

c401d3e 22 days ago

4.24 kB

	import numpy as np
	import torch
	import trimesh
	import io
	from PIL import Image


	_triposr_cache = None


	def get_triposr():
	global _triposr_cache
	if _triposr_cache is None:
	from transformers import TripoSRForImageTo3D, TripoSRImageProcessor
	processor = TripoSRImageProcessor.from_pretrained("stabilityai/TripoSR")
	model = TripoSRForImageTo3D.from_pretrained("stabilityai/TripoSR")
	model.eval()
	_triposr_cache = (model, processor)
	return _triposr_cache


	def reconstruct_region(image: Image.Image, mask: list[list[bool]], bbox: list[int]) -> bytes:
	"""
	Crop the masked region from the image, run TripoSR, return GLB bytes.
	"""
	model, processor = get_triposr()
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = model.to(device)

	# Crop to bounding box with 20% padding
	x, y, w, h = bbox
	pad_x = int(w * 0.20)
	pad_y = int(h * 0.20)
	W, H = image.size
	x0 = max(0, x - pad_x)
	y0 = max(0, y - pad_y)
	x1 = min(W, x + w + pad_x)
	y1 = min(H, y + h + pad_y)
	cropped = image.crop((x0, y0, x1, y1)).resize((512, 512), Image.LANCZOS)

	# Apply mask as alpha channel so TripoSR focuses on the region
	mask_arr = np.array(mask, dtype=np.uint8)[y0:y1, x0:x1]
	mask_resized = np.array(
	Image.fromarray(mask_arr * 255).resize((512, 512), Image.NEAREST)
	)
	rgba = np.array(cropped.convert("RGBA"))
	rgba[:, :, 3] = mask_resized
	input_img = Image.fromarray(rgba)

	inputs = processor(images=input_img, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model(**inputs)

	# Export as GLB via trimesh
	mesh_data = outputs.mesh # TripoSR returns a trimesh-compatible object
	if hasattr(mesh_data, "export"):
	glb_bytes = mesh_data.export(file_type="glb")
	else:
	# Fallback: build trimesh from vertices/faces tensors
	verts = mesh_data.verts_list()[0].cpu().numpy()
	faces = mesh_data.faces_list()[0].cpu().numpy()
	mesh = trimesh.Trimesh(vertices=verts, faces=faces, process=False)
	buf = io.BytesIO()
	mesh.export(buf, file_type="glb")
	glb_bytes = buf.getvalue()

	return glb_bytes


	def depth_to_mesh(depth: list[list[float]], mask: list[list[bool]], image: Image.Image) -> bytes:
	"""
	Fallback when TripoSR isn't available: lift depth map into a 3D mesh
	constrained to the masked region, textured with the source image.
	"""
	depth_arr = np.array(depth, dtype=np.float32)
	mask_arr = np.array(mask, dtype=bool)
	H, W = depth_arr.shape

	# Normalize depth to [0, 1] then scale to reasonable Z range
	dmin, dmax = depth_arr.min(), depth_arr.max()
	if dmax > dmin:
	depth_norm = (depth_arr - dmin) / (dmax - dmin)
	else:
	depth_norm = np.zeros_like(depth_arr)
	depth_scaled = depth_norm * 0.5 # 0.5 units of Z range

	# Build vertex grid only for masked pixels
	ys, xs = np.where(mask_arr)
	if len(xs) == 0:
	# Empty mask — return a flat quad
	verts = np.array([[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0]], dtype=np.float32)
	faces = np.array([[0, 1, 2], [0, 2, 3]])
	mesh = trimesh.Trimesh(vertices=verts, faces=faces)
	buf = io.BytesIO()
	mesh.export(buf, file_type="glb")
	return buf.getvalue()

	# Normalize to [-0.5, 0.5] XY space
	x_norm = (xs / W) - 0.5
	y_norm = 0.5 - (ys / H)
	z_vals = depth_scaled[ys, xs]
	vertices = np.stack([x_norm, y_norm, z_vals], axis=1).astype(np.float32)

	# UV = source pixel position
	uvs = np.stack([xs / W, 1.0 - ys / H], axis=1).astype(np.float32)

	# Triangulate the masked grid using Delaunay
	from scipy.spatial import Delaunay
	points_2d = np.stack([x_norm, y_norm], axis=1)
	tri = Delaunay(points_2d)
	faces = tri.simplices.astype(np.int32)

	# Build mesh with texture
	img_arr = np.array(image.convert("RGB"))
	texture = trimesh.visual.texture.TextureVisuals(
	uv=uvs,
	image=Image.fromarray(img_arr),
	)
	mesh = trimesh.Trimesh(vertices=vertices, faces=faces, visual=texture, process=False)
	buf = io.BytesIO()
	mesh.export(buf, file_type="glb")
	return buf.getvalue()