#!/usr/bin/env python3 """Render auto-kinematics mesh views inside headless Blender.""" from __future__ import annotations import argparse import math import sys from pathlib import Path import numpy as np # Blender 3.4's bundled glTF importer still references NumPy aliases removed in # newer NumPy builds. Install them before bpy imports or glTF loading can run. np.bool = bool np.int = int np.float = float np.complex = complex np.object = object import bpy from mathutils import Matrix, Vector IMPORT_FUNCTIONS = { "obj": bpy.ops.wm.obj_import, "glb": bpy.ops.import_scene.gltf, "gltf": bpy.ops.import_scene.gltf, "fbx": bpy.ops.import_scene.fbx, "stl": bpy.ops.wm.stl_import, "dae": bpy.ops.wm.collada_import, "ply": bpy.ops.wm.ply_import, } UP_DIR_ROTATIONS = { "+X": ((0.0, 0.0, -1.0), (0.0, 1.0, 0.0), (1.0, 0.0, 0.0)), "-X": ((0.0, 0.0, 1.0), (0.0, 1.0, 0.0), (-1.0, 0.0, 0.0)), "+Y": ((1.0, 0.0, 0.0), (0.0, 0.0, -1.0), (0.0, 1.0, 0.0)), "-Y": ((1.0, 0.0, 0.0), (0.0, 0.0, 1.0), (0.0, -1.0, 0.0)), "+Z": ((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0)), "-Z": ((1.0, 0.0, 0.0), (0.0, -1.0, 0.0), (0.0, 0.0, -1.0)), } def canonicalize_up_dir(up_dir: str) -> str: token = str(up_dir).strip().upper() if token in {"X", "Y", "Z"}: token = f"+{token}" if token not in UP_DIR_ROTATIONS: raise ValueError(f"Invalid up direction: {up_dir}") return token def orient_saved_raster_payload(buffer: np.ndarray, *, flip: bool) -> np.ndarray: """Match the final saved image orientation for per-pixel render payloads.""" oriented = np.asarray(buffer) if flip: oriented = np.flip(np.flip(oriented, axis=0), axis=1) return oriented.copy() def parse_args() -> argparse.Namespace: argv = sys.argv[sys.argv.index("--") + 1 :] if "--" in sys.argv else [] parser = argparse.ArgumentParser() parser.add_argument("--mesh-path", type=Path, required=True) parser.add_argument("--output-dir", type=Path, required=True) parser.add_argument("--resolution", type=int, required=True) parser.add_argument("--camera-distance", type=float, required=True) parser.add_argument("--pitch-deg", type=float, required=True) parser.add_argument("--engine", type=str, default="CYCLES") parser.add_argument("--samples", type=int, default=8) parser.add_argument("--azimuths", type=float, nargs="+", required=True) parser.add_argument("--up-dir", type=str, default="") return parser.parse_args(argv) def load_object(mesh_path: Path) -> None: suffix = mesh_path.suffix.lower().lstrip(".") if suffix not in IMPORT_FUNCTIONS: raise ValueError(f"Unsupported mesh type for Blender auto-kinematics render: {mesh_path.suffix}") importer = IMPORT_FUNCTIONS[suffix] if suffix in {"glb", "gltf"}: try: result = importer(filepath=str(mesh_path), merge_vertices=False) except TypeError: result = importer(filepath=str(mesh_path)) elif suffix == "obj": result = importer(filepath=str(mesh_path), forward_axis="Y", up_axis="Z") else: result = importer(filepath=str(mesh_path)) if "FINISHED" not in result: raise RuntimeError(f"Blender failed to import mesh: {mesh_path}") bpy.context.view_layer.update() for material in bpy.data.materials: material.use_backface_culling = True def matrix_to_numpy(matrix: object) -> np.ndarray: return np.array([list(row) for row in matrix], dtype=np.float32) def reset_scene() -> bpy.types.Object: bpy.ops.wm.read_factory_settings(use_empty=True) bpy.ops.object.camera_add() camera = bpy.context.active_object camera.name = "Camera" bpy.context.scene.camera = camera return camera def init_render(*, engine: str, resolution: int, samples: int) -> None: scene = bpy.context.scene scene.render.engine = engine scene.render.resolution_x = int(resolution) scene.render.resolution_y = int(resolution) scene.render.resolution_percentage = 100 scene.render.image_settings.file_format = "PNG" scene.render.image_settings.color_mode = "RGBA" scene.render.film_transparent = True scene.render.use_compositing = False scene.render.use_sequencer = False scene.render.dither_intensity = 0.0 scene.display_settings.display_device = "sRGB" scene.view_settings.view_transform = "Standard" if hasattr(scene.view_settings, "look"): try: scene.view_settings.look = "Medium High Contrast" except TypeError: pass scene.view_settings.exposure = 0.0 scene.view_settings.gamma = 1.0 if engine == "CYCLES": scene.cycles.device = "GPU" scene.cycles.samples = int(samples) scene.cycles.filter_type = "GAUSSIAN" scene.cycles.filter_width = 1.5 scene.cycles.diffuse_bounces = 1 scene.cycles.glossy_bounces = 1 scene.cycles.transparent_max_bounces = 8 scene.cycles.transmission_bounces = 8 # The Blender build available on HF Spaces is compiled without # OpenImageDenoise. Enabling Cycles denoising makes rendering abort # with "Build without OpenImageDenoiser", so keep it off here. scene.cycles.use_denoising = False try: prefs = bpy.context.preferences.addons["cycles"].preferences prefs.get_devices() available_types = {device.type for device in prefs.devices} for device_type in ("CUDA", "OPTIX", "HIP", "ONEAPI", "METAL"): if device_type in available_types: prefs.compute_device_type = device_type break except Exception: pass def init_lighting() -> None: def look_at_origin(light_obj: bpy.types.Object) -> None: target = Vector((0.0, 0.0, 0.0)) direction = target - Vector(light_obj.location) if direction.length == 0: return direction.normalize() up = Vector((0.0, 0.0, 1.0)) if abs(direction.dot(up)) > 0.999: up = Vector((0.0, 1.0, 0.0)) right = direction.cross(up) right.normalize() corrected_up = right.cross(direction) corrected_up.normalize() rotation_matrix = Matrix( ( (right.x, corrected_up.x, -direction.x), (right.y, corrected_up.y, -direction.y), (right.z, corrected_up.z, -direction.z), ) ).to_4x4() light_obj.rotation_mode = "XYZ" light_obj.rotation_euler = (Matrix.Rotation(0, 4, "X") @ rotation_matrix).to_euler("XYZ") top_light = bpy.data.objects.new( "Top_Light", bpy.data.lights.new("Top_Light", type="AREA"), ) bpy.context.collection.objects.link(top_light) top_light.data.energy = 1500 top_light.location = (0.0, 0.0, 8.0) top_light.data.size = 3.0 look_at_origin(top_light) light_distance = 5.0 light_height = 3.0 light_energy_base = 1200.0 light_size = 2.0 side_specs = ( ("Light_X_Pos", (light_distance, 0.0, light_height), light_energy_base * 0.6), ("Light_X_Neg", (-light_distance, 0.0, light_height), light_energy_base * 1.4), ("Light_Y_Pos", (0.0, light_distance, light_height), light_energy_base), ("Light_Y_Neg", (0.0, -light_distance, light_height), light_energy_base), ) for name, location, energy in side_specs: light = bpy.data.objects.new(name, bpy.data.lights.new(name, type="AREA")) bpy.context.collection.objects.link(light) light.data.energy = energy light.location = location light.data.size = light_size look_at_origin(light) def get_scene_root_objects() -> list[bpy.types.Object]: return [obj for obj in bpy.context.scene.objects.values() if not obj.parent] def get_scene_meshes() -> list[bpy.types.Object]: return [obj for obj in bpy.context.scene.objects.values() if isinstance(obj.data, bpy.types.Mesh)] def create_render_root() -> bpy.types.Object: root = bpy.data.objects.new("AutoKinematicsRenderRoot", None) bpy.context.scene.collection.objects.link(root) imported_roots = [ obj for obj in bpy.context.scene.objects.values() if obj.parent is None and obj.type not in {"CAMERA", "LIGHT"} ] for obj in imported_roots: if obj == root: continue obj.parent = root obj.matrix_parent_inverse = root.matrix_world.inverted() return root def rotation_matrix_for_up(up_dir: str) -> Matrix: return Matrix(UP_DIR_ROTATIONS[canonicalize_up_dir(up_dir)]).to_4x4() def import_basis_to_blender(mesh_path: Path) -> Matrix: suffix = mesh_path.suffix.lower().lstrip(".") if suffix in {"glb", "gltf"}: return rotation_matrix_for_up("+Y") return Matrix.Identity(4) def orient_and_normalize( root: bpy.types.Object, *, up_dir: str, import_basis: Matrix, ) -> None: # Keep auto-kinematic renders in the same selected-upright frame as the # upright orientation picker: undo Blender's glTF import basis, then apply # the user-selected source-up -> +Z rotation. rotation = rotation_matrix_for_up(up_dir) @ import_basis.inverted() root.matrix_world = rotation bpy.context.view_layer.update() bbox_min, bbox_max = scene_bbox() center = (bbox_min + bbox_max) * 0.5 extent = bbox_max - bbox_min max_extent = max(float(extent.x), float(extent.y), float(extent.z), 1e-6) scale = 1.0 / max_extent scale_matrix = Matrix.Diagonal((scale, scale, scale, 1.0)) center_matrix = Matrix.Translation(-center) root.matrix_world = scale_matrix @ center_matrix @ rotation bpy.context.view_layer.update() def scene_bbox() -> tuple[Vector, Vector]: bbox_min = (math.inf,) * 3 bbox_max = (-math.inf,) * 3 found = False for obj in get_scene_meshes(): found = True for coord in obj.bound_box: world_coord = obj.matrix_world @ Vector(coord) bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, world_coord)) bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, world_coord)) if not found: raise RuntimeError("No mesh objects found in the scene") return Vector(bbox_min), Vector(bbox_max) def normalize_scene() -> None: root_objects = get_scene_root_objects() if len(root_objects) > 1: parent_empty = bpy.data.objects.new("ParentEmpty", None) bpy.context.scene.collection.objects.link(parent_empty) for obj in root_objects: if obj != parent_empty: obj.parent = parent_empty bbox_min, bbox_max = scene_bbox() scale = 1.0 / max(bbox_max - bbox_min) for obj in get_scene_root_objects(): obj.scale = obj.scale * scale bpy.context.view_layer.update() bbox_min, bbox_max = scene_bbox() offset = -(bbox_min + bbox_max) / 2 for obj in get_scene_root_objects(): obj.matrix_world.translation += offset bpy.ops.object.select_all(action="DESELECT") bpy.data.objects["Camera"].parent = None bpy.context.view_layer.update() def configure_camera(camera: bpy.types.Object, *, resolution: int) -> np.ndarray: camera.data.type = "PERSP" camera.data.lens_unit = "MILLIMETERS" camera.data.sensor_width = 36.0 camera.data.sensor_height = 36.0 camera.data.sensor_fit = "HORIZONTAL" camera.data.lens = 50.0 fx = float(resolution) * float(camera.data.lens) / float(camera.data.sensor_width) fy = float(resolution) * float(camera.data.lens) / float(camera.data.sensor_height) cx = 0.5 * (float(resolution) - 1.0) cy = 0.5 * (float(resolution) - 1.0) return np.asarray( [ [fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0], ], dtype=np.float32, ) def set_camera_pose( camera: bpy.types.Object, *, camera_distance: float, azimuth_deg: float, pitch_deg: float, ) -> None: azimuth = math.radians(float(azimuth_deg)) pitch = math.radians(float(pitch_deg)) horizontal_distance = float(camera_distance) * math.sin(pitch) y = horizontal_distance * math.cos(azimuth) x = horizontal_distance * math.sin(azimuth) z = float(camera_distance) * math.cos(pitch) camera.location = Vector((x, y, z)) target = Vector((0.0, 0.0, 0.0)) forward = target - camera.location if forward.length == 0: forward = Vector((0.0, 0.0, -1.0)) forward.normalize() up = Vector((0.0, 0.0, 1.0)) if abs(forward.dot(up)) > 0.999: up = Vector((0.0, 1.0, 0.0)) right = forward.cross(up) right.normalize() corrected_up = right.cross(forward) corrected_up.normalize() world_up = Vector((0.0, 0.0, 1.0)) if corrected_up.dot(world_up) < 0: right = -right corrected_up = -corrected_up rotation_matrix = Matrix( ( (right.x, corrected_up.x, (-forward).x), (right.y, corrected_up.y, (-forward).y), (right.z, corrected_up.z, (-forward).z), ) ) camera.rotation_mode = "XYZ" camera.rotation_euler = rotation_matrix.to_euler("XYZ") def get_world_to_camera(camera: bpy.types.Object) -> np.ndarray: location, rotation = camera.matrix_world.decompose()[0:2] rotation_world_to_camera = rotation.to_matrix().transposed() translation_world_to_camera = -(rotation_world_to_camera @ location) return np.asarray( [ list(rotation_world_to_camera[0]) + [float(translation_world_to_camera[0])], list(rotation_world_to_camera[1]) + [float(translation_world_to_camera[1])], list(rotation_world_to_camera[2]) + [float(translation_world_to_camera[2])], [0.0, 0.0, 0.0, 1.0], ], dtype=np.float32, ) def rasterize_view( camera: bpy.types.Object, *, intrinsic: np.ndarray, resolution: int, ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: camera_to_world = matrix_to_numpy(camera.matrix_world) world_to_camera = get_world_to_camera(camera) fx = float(intrinsic[0, 0]) fy = float(intrinsic[1, 1]) cx = float(intrinsic[0, 2]) cy = float(intrinsic[1, 2]) width = int(resolution) height = int(resolution) grid_x, grid_y = np.meshgrid( np.arange(width, dtype=np.float64), np.arange(height, dtype=np.float64), ) dir_x = (grid_x - cx) / fx dir_y = -(grid_y - cy) / fy directions_camera = np.stack( [dir_x, dir_y, np.ones_like(dir_x)], axis=-1, ).reshape(-1, 3) directions_camera /= np.linalg.norm(directions_camera, axis=-1, keepdims=True) right = camera_to_world[:3, 0] up = camera_to_world[:3, 1] forward = -camera_to_world[:3, 2] directions_world = ( directions_camera[:, 0:1] * right[None, :] + directions_camera[:, 1:2] * up[None, :] + directions_camera[:, 2:3] * forward[None, :] ) directions_world /= np.linalg.norm(directions_world, axis=-1, keepdims=True) camera_origin = np.asarray(camera_to_world[:3, 3], dtype=np.float64) flat_face_ids = np.full((height * width,), -1, dtype=np.int32) flat_hit_points = np.full((height * width, 3), np.nan, dtype=np.float32) flat_normals = np.full((height * width, 3), np.nan, dtype=np.float32) flat_depth = np.full((height * width,), np.nan, dtype=np.float32) depsgraph = bpy.context.evaluated_depsgraph_get() origin_vec = Vector(camera_origin.tolist()) for ray_idx, direction in enumerate(directions_world): hit, location, normal, _, _, _ = bpy.context.scene.ray_cast( depsgraph, origin_vec, Vector(direction.tolist()), ) if not hit: continue flat_face_ids[ray_idx] = 0 hit_point = np.asarray((location.x, location.y, location.z), dtype=np.float32) hit_normal = np.asarray((normal.x, normal.y, normal.z), dtype=np.float32) normal_norm = float(np.linalg.norm(hit_normal)) if normal_norm > 1e-8: hit_normal = hit_normal / normal_norm flat_hit_points[ray_idx] = hit_point flat_normals[ray_idx] = hit_normal flat_depth[ray_idx] = float((world_to_camera[:3, :3] @ hit_point.astype(np.float64) + world_to_camera[:3, 3])[2]) return ( camera_to_world, world_to_camera, flat_face_ids.reshape(height, width), flat_hit_points.reshape(height, width, 3), flat_normals.reshape(height, width, 3), flat_depth.reshape(height, width), ) def main() -> None: args = parse_args() output_dir = args.output_dir.resolve() output_dir.mkdir(parents=True, exist_ok=True) camera = reset_scene() init_render( engine=str(args.engine), resolution=int(args.resolution), samples=int(args.samples), ) mesh_path = args.mesh_path.resolve() load_object(mesh_path) selected_up_dir = str(args.up_dir).strip() if selected_up_dir: root = create_render_root() orient_and_normalize( root, up_dir=canonicalize_up_dir(selected_up_dir), import_basis=import_basis_to_blender(mesh_path), ) else: normalize_scene() init_lighting() intrinsic = configure_camera( camera, resolution=int(args.resolution), ) for image_id, azimuth_deg in enumerate(args.azimuths): set_camera_pose( camera, camera_distance=float(args.camera_distance), azimuth_deg=float(azimuth_deg), pitch_deg=float(args.pitch_deg), ) bpy.context.view_layer.update() image_path = output_dir / f"view_{image_id:03d}.png" camera_path = output_dir / f"view_{image_id:03d}_camera.npz" bpy.context.scene.render.filepath = str(image_path) bpy.ops.render.render(write_still=True) ( camera_to_world, world_to_camera, face_ids, hit_points, normals, depth, ) = rasterize_view( camera, intrinsic=intrinsic, resolution=int(args.resolution), ) np.savez_compressed( camera_path, intrinsic=intrinsic.astype(np.float32), camera_to_world=camera_to_world.astype(np.float32), world_to_camera=world_to_camera.astype(np.float32), face_ids=orient_saved_raster_payload(face_ids, flip=not selected_up_dir).astype(np.int32), hit_points=orient_saved_raster_payload(hit_points, flip=not selected_up_dir).astype(np.float32), normals=orient_saved_raster_payload(normals, flip=not selected_up_dir).astype(np.float32), depth=orient_saved_raster_payload(depth, flip=not selected_up_dir).astype(np.float32), azimuth_deg=np.float32(azimuth_deg), elevation_deg=np.float32(90.0 - float(args.pitch_deg)), pitch_deg=np.float32(args.pitch_deg), ) print( f"Rendered auto-kinematics Blender view {image_id + 1}/{len(args.azimuths)} " f"(azimuth={float(azimuth_deg):.1f}, pitch={float(args.pitch_deg):.1f})" ) if __name__ == "__main__": main()