Spaces:

rayli
/

instruct-particulate

Running on Zero

App Files Files Community

instruct-particulate / scripts /render_auto_kinematics_blender.py

rayli

Fix auto-kinematics Blender PIL dependency

06713f0 verified 23 days ago

Raw

History Blame Contribute Delete

19.4 kB

	#!/usr/bin/env python3
	"""Render auto-kinematics mesh views inside headless Blender."""

	from __future__ import annotations

	import argparse
	import math
	import sys
	from pathlib import Path

	import numpy as np

	# Blender 3.4's bundled glTF importer still references NumPy aliases removed in
	# newer NumPy builds. Install them before bpy imports or glTF loading can run.
	np.bool = bool
	np.int = int
	np.float = float
	np.complex = complex
	np.object = object

	import bpy
	from mathutils import Matrix, Vector

	IMPORT_FUNCTIONS = {
	"obj": bpy.ops.wm.obj_import,
	"glb": bpy.ops.import_scene.gltf,
	"gltf": bpy.ops.import_scene.gltf,
	"fbx": bpy.ops.import_scene.fbx,
	"stl": bpy.ops.wm.stl_import,
	"dae": bpy.ops.wm.collada_import,
	"ply": bpy.ops.wm.ply_import,
	}


	UP_DIR_ROTATIONS = {
	"+X": ((0.0, 0.0, -1.0), (0.0, 1.0, 0.0), (1.0, 0.0, 0.0)),
	"-X": ((0.0, 0.0, 1.0), (0.0, 1.0, 0.0), (-1.0, 0.0, 0.0)),
	"+Y": ((1.0, 0.0, 0.0), (0.0, 0.0, -1.0), (0.0, 1.0, 0.0)),
	"-Y": ((1.0, 0.0, 0.0), (0.0, 0.0, 1.0), (0.0, -1.0, 0.0)),
	"+Z": ((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0)),
	"-Z": ((1.0, 0.0, 0.0), (0.0, -1.0, 0.0), (0.0, 0.0, -1.0)),
	}


	def canonicalize_up_dir(up_dir: str) -> str:
	token = str(up_dir).strip().upper()
	if token in {"X", "Y", "Z"}:
	token = f"+{token}"
	if token not in UP_DIR_ROTATIONS:
	raise ValueError(f"Invalid up direction: {up_dir}")
	return token


	def orient_saved_raster_payload(buffer: np.ndarray, *, flip: bool) -> np.ndarray:
	"""Match the final saved image orientation for per-pixel render payloads."""
	oriented = np.asarray(buffer)
	if flip:
	oriented = np.flip(np.flip(oriented, axis=0), axis=1)
	return oriented.copy()


	def parse_args() -> argparse.Namespace:
	argv = sys.argv[sys.argv.index("--") + 1 :] if "--" in sys.argv else []
	parser = argparse.ArgumentParser()
	parser.add_argument("--mesh-path", type=Path, required=True)
	parser.add_argument("--output-dir", type=Path, required=True)
	parser.add_argument("--resolution", type=int, required=True)
	parser.add_argument("--camera-distance", type=float, required=True)
	parser.add_argument("--pitch-deg", type=float, required=True)
	parser.add_argument("--engine", type=str, default="CYCLES")
	parser.add_argument("--samples", type=int, default=8)
	parser.add_argument("--azimuths", type=float, nargs="+", required=True)
	parser.add_argument("--up-dir", type=str, default="")
	return parser.parse_args(argv)


	def load_object(mesh_path: Path) -> None:
	suffix = mesh_path.suffix.lower().lstrip(".")
	if suffix not in IMPORT_FUNCTIONS:
	raise ValueError(f"Unsupported mesh type for Blender auto-kinematics render: {mesh_path.suffix}")
	importer = IMPORT_FUNCTIONS[suffix]
	if suffix in {"glb", "gltf"}:
	try:
	result = importer(filepath=str(mesh_path), merge_vertices=False)
	except TypeError:
	result = importer(filepath=str(mesh_path))
	elif suffix == "obj":
	result = importer(filepath=str(mesh_path), forward_axis="Y", up_axis="Z")
	else:
	result = importer(filepath=str(mesh_path))
	if "FINISHED" not in result:
	raise RuntimeError(f"Blender failed to import mesh: {mesh_path}")
	bpy.context.view_layer.update()
	for material in bpy.data.materials:
	material.use_backface_culling = True


	def matrix_to_numpy(matrix: object) -> np.ndarray:
	return np.array([list(row) for row in matrix], dtype=np.float32)


	def reset_scene() -> bpy.types.Object:
	bpy.ops.wm.read_factory_settings(use_empty=True)
	bpy.ops.object.camera_add()
	camera = bpy.context.active_object
	camera.name = "Camera"
	bpy.context.scene.camera = camera
	return camera


	def init_render(*, engine: str, resolution: int, samples: int) -> None:
	scene = bpy.context.scene
	scene.render.engine = engine
	scene.render.resolution_x = int(resolution)
	scene.render.resolution_y = int(resolution)
	scene.render.resolution_percentage = 100
	scene.render.image_settings.file_format = "PNG"
	scene.render.image_settings.color_mode = "RGBA"
	scene.render.film_transparent = True
	scene.render.use_compositing = False
	scene.render.use_sequencer = False
	scene.render.dither_intensity = 0.0
	scene.display_settings.display_device = "sRGB"
	scene.view_settings.view_transform = "Standard"
	if hasattr(scene.view_settings, "look"):
	try:
	scene.view_settings.look = "Medium High Contrast"
	except TypeError:
	pass
	scene.view_settings.exposure = 0.0
	scene.view_settings.gamma = 1.0
	if engine == "CYCLES":
	scene.cycles.device = "GPU"
	scene.cycles.samples = int(samples)
	scene.cycles.filter_type = "GAUSSIAN"
	scene.cycles.filter_width = 1.5
	scene.cycles.diffuse_bounces = 1
	scene.cycles.glossy_bounces = 1
	scene.cycles.transparent_max_bounces = 8
	scene.cycles.transmission_bounces = 8
	# The Blender build available on HF Spaces is compiled without
	# OpenImageDenoise. Enabling Cycles denoising makes rendering abort
	# with "Build without OpenImageDenoiser", so keep it off here.
	scene.cycles.use_denoising = False
	try:
	prefs = bpy.context.preferences.addons["cycles"].preferences
	prefs.get_devices()
	available_types = {device.type for device in prefs.devices}
	for device_type in ("CUDA", "OPTIX", "HIP", "ONEAPI", "METAL"):
	if device_type in available_types:
	prefs.compute_device_type = device_type
	break
	except Exception:
	pass


	def init_lighting() -> None:
	def look_at_origin(light_obj: bpy.types.Object) -> None:
	target = Vector((0.0, 0.0, 0.0))
	direction = target - Vector(light_obj.location)
	if direction.length == 0:
	return
	direction.normalize()
	up = Vector((0.0, 0.0, 1.0))
	if abs(direction.dot(up)) > 0.999:
	up = Vector((0.0, 1.0, 0.0))
	right = direction.cross(up)
	right.normalize()
	corrected_up = right.cross(direction)
	corrected_up.normalize()
	rotation_matrix = Matrix(
	(
	(right.x, corrected_up.x, -direction.x),
	(right.y, corrected_up.y, -direction.y),
	(right.z, corrected_up.z, -direction.z),
	)
	).to_4x4()
	light_obj.rotation_mode = "XYZ"
	light_obj.rotation_euler = (Matrix.Rotation(0, 4, "X") @ rotation_matrix).to_euler("XYZ")

	top_light = bpy.data.objects.new(
	"Top_Light",
	bpy.data.lights.new("Top_Light", type="AREA"),
	)
	bpy.context.collection.objects.link(top_light)
	top_light.data.energy = 1500
	top_light.location = (0.0, 0.0, 8.0)
	top_light.data.size = 3.0
	look_at_origin(top_light)

	light_distance = 5.0
	light_height = 3.0
	light_energy_base = 1200.0
	light_size = 2.0
	side_specs = (
	("Light_X_Pos", (light_distance, 0.0, light_height), light_energy_base * 0.6),
	("Light_X_Neg", (-light_distance, 0.0, light_height), light_energy_base * 1.4),
	("Light_Y_Pos", (0.0, light_distance, light_height), light_energy_base),
	("Light_Y_Neg", (0.0, -light_distance, light_height), light_energy_base),
	)
	for name, location, energy in side_specs:
	light = bpy.data.objects.new(name, bpy.data.lights.new(name, type="AREA"))
	bpy.context.collection.objects.link(light)
	light.data.energy = energy
	light.location = location
	light.data.size = light_size
	look_at_origin(light)


	def get_scene_root_objects() -> list[bpy.types.Object]:
	return [obj for obj in bpy.context.scene.objects.values() if not obj.parent]


	def get_scene_meshes() -> list[bpy.types.Object]:
	return [obj for obj in bpy.context.scene.objects.values() if isinstance(obj.data, bpy.types.Mesh)]


	def create_render_root() -> bpy.types.Object:
	root = bpy.data.objects.new("AutoKinematicsRenderRoot", None)
	bpy.context.scene.collection.objects.link(root)
	imported_roots = [
	obj
	for obj in bpy.context.scene.objects.values()
	if obj.parent is None and obj.type not in {"CAMERA", "LIGHT"}
	]
	for obj in imported_roots:
	if obj == root:
	continue
	obj.parent = root
	obj.matrix_parent_inverse = root.matrix_world.inverted()
	return root


	def rotation_matrix_for_up(up_dir: str) -> Matrix:
	return Matrix(UP_DIR_ROTATIONS[canonicalize_up_dir(up_dir)]).to_4x4()


	def import_basis_to_blender(mesh_path: Path) -> Matrix:
	suffix = mesh_path.suffix.lower().lstrip(".")
	if suffix in {"glb", "gltf"}:
	return rotation_matrix_for_up("+Y")
	return Matrix.Identity(4)


	def orient_and_normalize(
	root: bpy.types.Object,
	*,
	up_dir: str,
	import_basis: Matrix,
	) -> None:
	# Keep auto-kinematic renders in the same selected-upright frame as the
	# upright orientation picker: undo Blender's glTF import basis, then apply
	# the user-selected source-up -> +Z rotation.
	rotation = rotation_matrix_for_up(up_dir) @ import_basis.inverted()
	root.matrix_world = rotation
	bpy.context.view_layer.update()

	bbox_min, bbox_max = scene_bbox()
	center = (bbox_min + bbox_max) * 0.5
	extent = bbox_max - bbox_min
	max_extent = max(float(extent.x), float(extent.y), float(extent.z), 1e-6)
	scale = 1.0 / max_extent
	scale_matrix = Matrix.Diagonal((scale, scale, scale, 1.0))
	center_matrix = Matrix.Translation(-center)
	root.matrix_world = scale_matrix @ center_matrix @ rotation
	bpy.context.view_layer.update()


	def scene_bbox() -> tuple[Vector, Vector]:
	bbox_min = (math.inf,) * 3
	bbox_max = (-math.inf,) * 3
	found = False
	for obj in get_scene_meshes():
	found = True
	for coord in obj.bound_box:
	world_coord = obj.matrix_world @ Vector(coord)
	bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, world_coord))
	bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, world_coord))
	if not found:
	raise RuntimeError("No mesh objects found in the scene")
	return Vector(bbox_min), Vector(bbox_max)


	def normalize_scene() -> None:
	root_objects = get_scene_root_objects()
	if len(root_objects) > 1:
	parent_empty = bpy.data.objects.new("ParentEmpty", None)
	bpy.context.scene.collection.objects.link(parent_empty)
	for obj in root_objects:
	if obj != parent_empty:
	obj.parent = parent_empty

	bbox_min, bbox_max = scene_bbox()
	scale = 1.0 / max(bbox_max - bbox_min)
	for obj in get_scene_root_objects():
	obj.scale = obj.scale * scale

	bpy.context.view_layer.update()
	bbox_min, bbox_max = scene_bbox()
	offset = -(bbox_min + bbox_max) / 2
	for obj in get_scene_root_objects():
	obj.matrix_world.translation += offset

	bpy.ops.object.select_all(action="DESELECT")
	bpy.data.objects["Camera"].parent = None
	bpy.context.view_layer.update()


	def configure_camera(camera: bpy.types.Object, *, resolution: int) -> np.ndarray:
	camera.data.type = "PERSP"
	camera.data.lens_unit = "MILLIMETERS"
	camera.data.sensor_width = 36.0
	camera.data.sensor_height = 36.0
	camera.data.sensor_fit = "HORIZONTAL"
	camera.data.lens = 50.0

	fx = float(resolution) * float(camera.data.lens) / float(camera.data.sensor_width)
	fy = float(resolution) * float(camera.data.lens) / float(camera.data.sensor_height)
	cx = 0.5 * (float(resolution) - 1.0)
	cy = 0.5 * (float(resolution) - 1.0)
	return np.asarray(
	[
	[fx, 0.0, cx],
	[0.0, fy, cy],
	[0.0, 0.0, 1.0],
	],
	dtype=np.float32,
	)


	def set_camera_pose(
	camera: bpy.types.Object,
	*,
	camera_distance: float,
	azimuth_deg: float,
	pitch_deg: float,
	) -> None:
	azimuth = math.radians(float(azimuth_deg))
	pitch = math.radians(float(pitch_deg))
	horizontal_distance = float(camera_distance) * math.sin(pitch)
	y = horizontal_distance * math.cos(azimuth)
	x = horizontal_distance * math.sin(azimuth)
	z = float(camera_distance) * math.cos(pitch)
	camera.location = Vector((x, y, z))

	target = Vector((0.0, 0.0, 0.0))
	forward = target - camera.location
	if forward.length == 0:
	forward = Vector((0.0, 0.0, -1.0))
	forward.normalize()

	up = Vector((0.0, 0.0, 1.0))
	if abs(forward.dot(up)) > 0.999:
	up = Vector((0.0, 1.0, 0.0))
	right = forward.cross(up)
	right.normalize()
	corrected_up = right.cross(forward)
	corrected_up.normalize()
	world_up = Vector((0.0, 0.0, 1.0))
	if corrected_up.dot(world_up) < 0:
	right = -right
	corrected_up = -corrected_up

	rotation_matrix = Matrix(
	(
	(right.x, corrected_up.x, (-forward).x),
	(right.y, corrected_up.y, (-forward).y),
	(right.z, corrected_up.z, (-forward).z),
	)
	)
	camera.rotation_mode = "XYZ"
	camera.rotation_euler = rotation_matrix.to_euler("XYZ")


	def get_world_to_camera(camera: bpy.types.Object) -> np.ndarray:
	location, rotation = camera.matrix_world.decompose()[0:2]
	rotation_world_to_camera = rotation.to_matrix().transposed()
	translation_world_to_camera = -(rotation_world_to_camera @ location)
	return np.asarray(
	[
	list(rotation_world_to_camera[0]) + [float(translation_world_to_camera[0])],
	list(rotation_world_to_camera[1]) + [float(translation_world_to_camera[1])],
	list(rotation_world_to_camera[2]) + [float(translation_world_to_camera[2])],
	[0.0, 0.0, 0.0, 1.0],
	],
	dtype=np.float32,
	)


	def rasterize_view(
	camera: bpy.types.Object,
	*,
	intrinsic: np.ndarray,
	resolution: int,
	) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
	camera_to_world = matrix_to_numpy(camera.matrix_world)
	world_to_camera = get_world_to_camera(camera)
	fx = float(intrinsic[0, 0])
	fy = float(intrinsic[1, 1])
	cx = float(intrinsic[0, 2])
	cy = float(intrinsic[1, 2])
	width = int(resolution)
	height = int(resolution)

	grid_x, grid_y = np.meshgrid(
	np.arange(width, dtype=np.float64),
	np.arange(height, dtype=np.float64),
	)
	dir_x = (grid_x - cx) / fx
	dir_y = -(grid_y - cy) / fy
	directions_camera = np.stack(
	[dir_x, dir_y, np.ones_like(dir_x)],
	axis=-1,
	).reshape(-1, 3)
	directions_camera /= np.linalg.norm(directions_camera, axis=-1, keepdims=True)

	right = camera_to_world[:3, 0]
	up = camera_to_world[:3, 1]
	forward = -camera_to_world[:3, 2]
	directions_world = (
	directions_camera[:, 0:1] * right[None, :]
	+ directions_camera[:, 1:2] * up[None, :]
	+ directions_camera[:, 2:3] * forward[None, :]
	)
	directions_world /= np.linalg.norm(directions_world, axis=-1, keepdims=True)
	camera_origin = np.asarray(camera_to_world[:3, 3], dtype=np.float64)

	flat_face_ids = np.full((height * width,), -1, dtype=np.int32)
	flat_hit_points = np.full((height * width, 3), np.nan, dtype=np.float32)
	flat_normals = np.full((height * width, 3), np.nan, dtype=np.float32)
	flat_depth = np.full((height * width,), np.nan, dtype=np.float32)

	depsgraph = bpy.context.evaluated_depsgraph_get()
	origin_vec = Vector(camera_origin.tolist())
	for ray_idx, direction in enumerate(directions_world):
	hit, location, normal, _, _, _ = bpy.context.scene.ray_cast(
	depsgraph,
	origin_vec,
	Vector(direction.tolist()),
	)
	if not hit:
	continue
	flat_face_ids[ray_idx] = 0
	hit_point = np.asarray((location.x, location.y, location.z), dtype=np.float32)
	hit_normal = np.asarray((normal.x, normal.y, normal.z), dtype=np.float32)
	normal_norm = float(np.linalg.norm(hit_normal))
	if normal_norm > 1e-8:
	hit_normal = hit_normal / normal_norm
	flat_hit_points[ray_idx] = hit_point
	flat_normals[ray_idx] = hit_normal
	flat_depth[ray_idx] = float((world_to_camera[:3, :3] @ hit_point.astype(np.float64) + world_to_camera[:3, 3])[2])

	return (
	camera_to_world,
	world_to_camera,
	flat_face_ids.reshape(height, width),
	flat_hit_points.reshape(height, width, 3),
	flat_normals.reshape(height, width, 3),
	flat_depth.reshape(height, width),
	)


	def main() -> None:
	args = parse_args()

	output_dir = args.output_dir.resolve()
	output_dir.mkdir(parents=True, exist_ok=True)

	camera = reset_scene()
	init_render(
	engine=str(args.engine),
	resolution=int(args.resolution),
	samples=int(args.samples),
	)
	mesh_path = args.mesh_path.resolve()
	load_object(mesh_path)
	selected_up_dir = str(args.up_dir).strip()
	if selected_up_dir:
	root = create_render_root()
	orient_and_normalize(
	root,
	up_dir=canonicalize_up_dir(selected_up_dir),
	import_basis=import_basis_to_blender(mesh_path),
	)
	else:
	normalize_scene()
	init_lighting()
	intrinsic = configure_camera(
	camera,
	resolution=int(args.resolution),
	)

	for image_id, azimuth_deg in enumerate(args.azimuths):
	set_camera_pose(
	camera,
	camera_distance=float(args.camera_distance),
	azimuth_deg=float(azimuth_deg),
	pitch_deg=float(args.pitch_deg),
	)
	bpy.context.view_layer.update()
	image_path = output_dir / f"view_{image_id:03d}.png"
	camera_path = output_dir / f"view_{image_id:03d}_camera.npz"
	bpy.context.scene.render.filepath = str(image_path)
	bpy.ops.render.render(write_still=True)
	(
	camera_to_world,
	world_to_camera,
	face_ids,
	hit_points,
	normals,
	depth,
	) = rasterize_view(
	camera,
	intrinsic=intrinsic,
	resolution=int(args.resolution),
	)
	np.savez_compressed(
	camera_path,
	intrinsic=intrinsic.astype(np.float32),
	camera_to_world=camera_to_world.astype(np.float32),
	world_to_camera=world_to_camera.astype(np.float32),
	face_ids=orient_saved_raster_payload(face_ids, flip=not selected_up_dir).astype(np.int32),
	hit_points=orient_saved_raster_payload(hit_points, flip=not selected_up_dir).astype(np.float32),
	normals=orient_saved_raster_payload(normals, flip=not selected_up_dir).astype(np.float32),
	depth=orient_saved_raster_payload(depth, flip=not selected_up_dir).astype(np.float32),
	azimuth_deg=np.float32(azimuth_deg),
	elevation_deg=np.float32(90.0 - float(args.pitch_deg)),
	pitch_deg=np.float32(args.pitch_deg),
	)
	print(
	f"Rendered auto-kinematics Blender view {image_id + 1}/{len(args.azimuths)} "
	f"(azimuth={float(azimuth_deg):.1f}, pitch={float(args.pitch_deg):.1f})"
	)


	if __name__ == "__main__":
	main()