#!/usr/bin/env python3
"""Render auto-kinematics mesh views inside headless Blender."""

from __future__ import annotations

import argparse
import math
import sys
from pathlib import Path

import numpy as np

# Blender 3.4's bundled glTF importer still references NumPy aliases removed in
# newer NumPy builds. Install them before bpy imports or glTF loading can run.
np.bool = bool
np.int = int
np.float = float
np.complex = complex
np.object = object

import bpy
from mathutils import Matrix, Vector

IMPORT_FUNCTIONS = {
    "obj": bpy.ops.wm.obj_import,
    "glb": bpy.ops.import_scene.gltf,
    "gltf": bpy.ops.import_scene.gltf,
    "fbx": bpy.ops.import_scene.fbx,
    "stl": bpy.ops.wm.stl_import,
    "dae": bpy.ops.wm.collada_import,
    "ply": bpy.ops.wm.ply_import,
}


UP_DIR_ROTATIONS = {
    "+X": ((0.0, 0.0, -1.0), (0.0, 1.0, 0.0), (1.0, 0.0, 0.0)),
    "-X": ((0.0, 0.0, 1.0), (0.0, 1.0, 0.0), (-1.0, 0.0, 0.0)),
    "+Y": ((1.0, 0.0, 0.0), (0.0, 0.0, -1.0), (0.0, 1.0, 0.0)),
    "-Y": ((1.0, 0.0, 0.0), (0.0, 0.0, 1.0), (0.0, -1.0, 0.0)),
    "+Z": ((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0)),
    "-Z": ((1.0, 0.0, 0.0), (0.0, -1.0, 0.0), (0.0, 0.0, -1.0)),
}


def canonicalize_up_dir(up_dir: str) -> str:
    token = str(up_dir).strip().upper()
    if token in {"X", "Y", "Z"}:
        token = f"+{token}"
    if token not in UP_DIR_ROTATIONS:
        raise ValueError(f"Invalid up direction: {up_dir}")
    return token


def orient_saved_raster_payload(buffer: np.ndarray, *, flip: bool) -> np.ndarray:
    """Match the final saved image orientation for per-pixel render payloads."""
    oriented = np.asarray(buffer)
    if flip:
        oriented = np.flip(np.flip(oriented, axis=0), axis=1)
    return oriented.copy()


def parse_args() -> argparse.Namespace:
    argv = sys.argv[sys.argv.index("--") + 1 :] if "--" in sys.argv else []
    parser = argparse.ArgumentParser()
    parser.add_argument("--mesh-path", type=Path, required=True)
    parser.add_argument("--output-dir", type=Path, required=True)
    parser.add_argument("--resolution", type=int, required=True)
    parser.add_argument("--camera-distance", type=float, required=True)
    parser.add_argument("--pitch-deg", type=float, required=True)
    parser.add_argument("--engine", type=str, default="CYCLES")
    parser.add_argument("--samples", type=int, default=8)
    parser.add_argument("--azimuths", type=float, nargs="+", required=True)
    parser.add_argument("--up-dir", type=str, default="")
    return parser.parse_args(argv)


def load_object(mesh_path: Path) -> None:
    suffix = mesh_path.suffix.lower().lstrip(".")
    if suffix not in IMPORT_FUNCTIONS:
        raise ValueError(f"Unsupported mesh type for Blender auto-kinematics render: {mesh_path.suffix}")
    importer = IMPORT_FUNCTIONS[suffix]
    if suffix in {"glb", "gltf"}:
        try:
            result = importer(filepath=str(mesh_path), merge_vertices=False)
        except TypeError:
            result = importer(filepath=str(mesh_path))
    elif suffix == "obj":
        result = importer(filepath=str(mesh_path), forward_axis="Y", up_axis="Z")
    else:
        result = importer(filepath=str(mesh_path))
    if "FINISHED" not in result:
        raise RuntimeError(f"Blender failed to import mesh: {mesh_path}")
    bpy.context.view_layer.update()
    for material in bpy.data.materials:
        material.use_backface_culling = True


def matrix_to_numpy(matrix: object) -> np.ndarray:
    return np.array([list(row) for row in matrix], dtype=np.float32)


def reset_scene() -> bpy.types.Object:
    bpy.ops.wm.read_factory_settings(use_empty=True)
    bpy.ops.object.camera_add()
    camera = bpy.context.active_object
    camera.name = "Camera"
    bpy.context.scene.camera = camera
    return camera


def init_render(*, engine: str, resolution: int, samples: int) -> None:
    scene = bpy.context.scene
    scene.render.engine = engine
    scene.render.resolution_x = int(resolution)
    scene.render.resolution_y = int(resolution)
    scene.render.resolution_percentage = 100
    scene.render.image_settings.file_format = "PNG"
    scene.render.image_settings.color_mode = "RGBA"
    scene.render.film_transparent = True
    scene.render.use_compositing = False
    scene.render.use_sequencer = False
    scene.render.dither_intensity = 0.0
    scene.display_settings.display_device = "sRGB"
    scene.view_settings.view_transform = "Standard"
    if hasattr(scene.view_settings, "look"):
        try:
            scene.view_settings.look = "Medium High Contrast"
        except TypeError:
            pass
    scene.view_settings.exposure = 0.0
    scene.view_settings.gamma = 1.0
    if engine == "CYCLES":
        scene.cycles.device = "GPU"
        scene.cycles.samples = int(samples)
        scene.cycles.filter_type = "GAUSSIAN"
        scene.cycles.filter_width = 1.5
        scene.cycles.diffuse_bounces = 1
        scene.cycles.glossy_bounces = 1
        scene.cycles.transparent_max_bounces = 8
        scene.cycles.transmission_bounces = 8
        # The Blender build available on HF Spaces is compiled without
        # OpenImageDenoise. Enabling Cycles denoising makes rendering abort
        # with "Build without OpenImageDenoiser", so keep it off here.
        scene.cycles.use_denoising = False
        try:
            prefs = bpy.context.preferences.addons["cycles"].preferences
            prefs.get_devices()
            available_types = {device.type for device in prefs.devices}
            for device_type in ("CUDA", "OPTIX", "HIP", "ONEAPI", "METAL"):
                if device_type in available_types:
                    prefs.compute_device_type = device_type
                    break
        except Exception:
            pass


def init_lighting() -> None:
    def look_at_origin(light_obj: bpy.types.Object) -> None:
        target = Vector((0.0, 0.0, 0.0))
        direction = target - Vector(light_obj.location)
        if direction.length == 0:
            return
        direction.normalize()
        up = Vector((0.0, 0.0, 1.0))
        if abs(direction.dot(up)) > 0.999:
            up = Vector((0.0, 1.0, 0.0))
        right = direction.cross(up)
        right.normalize()
        corrected_up = right.cross(direction)
        corrected_up.normalize()
        rotation_matrix = Matrix(
            (
                (right.x, corrected_up.x, -direction.x),
                (right.y, corrected_up.y, -direction.y),
                (right.z, corrected_up.z, -direction.z),
            )
        ).to_4x4()
        light_obj.rotation_mode = "XYZ"
        light_obj.rotation_euler = (Matrix.Rotation(0, 4, "X") @ rotation_matrix).to_euler("XYZ")

    top_light = bpy.data.objects.new(
        "Top_Light",
        bpy.data.lights.new("Top_Light", type="AREA"),
    )
    bpy.context.collection.objects.link(top_light)
    top_light.data.energy = 1500
    top_light.location = (0.0, 0.0, 8.0)
    top_light.data.size = 3.0
    look_at_origin(top_light)

    light_distance = 5.0
    light_height = 3.0
    light_energy_base = 1200.0
    light_size = 2.0
    side_specs = (
        ("Light_X_Pos", (light_distance, 0.0, light_height), light_energy_base * 0.6),
        ("Light_X_Neg", (-light_distance, 0.0, light_height), light_energy_base * 1.4),
        ("Light_Y_Pos", (0.0, light_distance, light_height), light_energy_base),
        ("Light_Y_Neg", (0.0, -light_distance, light_height), light_energy_base),
    )
    for name, location, energy in side_specs:
        light = bpy.data.objects.new(name, bpy.data.lights.new(name, type="AREA"))
        bpy.context.collection.objects.link(light)
        light.data.energy = energy
        light.location = location
        light.data.size = light_size
        look_at_origin(light)


def get_scene_root_objects() -> list[bpy.types.Object]:
    return [obj for obj in bpy.context.scene.objects.values() if not obj.parent]


def get_scene_meshes() -> list[bpy.types.Object]:
    return [obj for obj in bpy.context.scene.objects.values() if isinstance(obj.data, bpy.types.Mesh)]


def create_render_root() -> bpy.types.Object:
    root = bpy.data.objects.new("AutoKinematicsRenderRoot", None)
    bpy.context.scene.collection.objects.link(root)
    imported_roots = [
        obj
        for obj in bpy.context.scene.objects.values()
        if obj.parent is None and obj.type not in {"CAMERA", "LIGHT"}
    ]
    for obj in imported_roots:
        if obj == root:
            continue
        obj.parent = root
        obj.matrix_parent_inverse = root.matrix_world.inverted()
    return root


def rotation_matrix_for_up(up_dir: str) -> Matrix:
    return Matrix(UP_DIR_ROTATIONS[canonicalize_up_dir(up_dir)]).to_4x4()


def import_basis_to_blender(mesh_path: Path) -> Matrix:
    suffix = mesh_path.suffix.lower().lstrip(".")
    if suffix in {"glb", "gltf"}:
        return rotation_matrix_for_up("+Y")
    return Matrix.Identity(4)


def orient_and_normalize(
    root: bpy.types.Object,
    *,
    up_dir: str,
    import_basis: Matrix,
) -> None:
    # Keep auto-kinematic renders in the same selected-upright frame as the
    # upright orientation picker: undo Blender's glTF import basis, then apply
    # the user-selected source-up -> +Z rotation.
    rotation = rotation_matrix_for_up(up_dir) @ import_basis.inverted()
    root.matrix_world = rotation
    bpy.context.view_layer.update()

    bbox_min, bbox_max = scene_bbox()
    center = (bbox_min + bbox_max) * 0.5
    extent = bbox_max - bbox_min
    max_extent = max(float(extent.x), float(extent.y), float(extent.z), 1e-6)
    scale = 1.0 / max_extent
    scale_matrix = Matrix.Diagonal((scale, scale, scale, 1.0))
    center_matrix = Matrix.Translation(-center)
    root.matrix_world = scale_matrix @ center_matrix @ rotation
    bpy.context.view_layer.update()


def scene_bbox() -> tuple[Vector, Vector]:
    bbox_min = (math.inf,) * 3
    bbox_max = (-math.inf,) * 3
    found = False
    for obj in get_scene_meshes():
        found = True
        for coord in obj.bound_box:
            world_coord = obj.matrix_world @ Vector(coord)
            bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, world_coord))
            bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, world_coord))
    if not found:
        raise RuntimeError("No mesh objects found in the scene")
    return Vector(bbox_min), Vector(bbox_max)


def normalize_scene() -> None:
    root_objects = get_scene_root_objects()
    if len(root_objects) > 1:
        parent_empty = bpy.data.objects.new("ParentEmpty", None)
        bpy.context.scene.collection.objects.link(parent_empty)
        for obj in root_objects:
            if obj != parent_empty:
                obj.parent = parent_empty

    bbox_min, bbox_max = scene_bbox()
    scale = 1.0 / max(bbox_max - bbox_min)
    for obj in get_scene_root_objects():
        obj.scale = obj.scale * scale

    bpy.context.view_layer.update()
    bbox_min, bbox_max = scene_bbox()
    offset = -(bbox_min + bbox_max) / 2
    for obj in get_scene_root_objects():
        obj.matrix_world.translation += offset

    bpy.ops.object.select_all(action="DESELECT")
    bpy.data.objects["Camera"].parent = None
    bpy.context.view_layer.update()


def configure_camera(camera: bpy.types.Object, *, resolution: int) -> np.ndarray:
    camera.data.type = "PERSP"
    camera.data.lens_unit = "MILLIMETERS"
    camera.data.sensor_width = 36.0
    camera.data.sensor_height = 36.0
    camera.data.sensor_fit = "HORIZONTAL"
    camera.data.lens = 50.0

    fx = float(resolution) * float(camera.data.lens) / float(camera.data.sensor_width)
    fy = float(resolution) * float(camera.data.lens) / float(camera.data.sensor_height)
    cx = 0.5 * (float(resolution) - 1.0)
    cy = 0.5 * (float(resolution) - 1.0)
    return np.asarray(
        [
            [fx, 0.0, cx],
            [0.0, fy, cy],
            [0.0, 0.0, 1.0],
        ],
        dtype=np.float32,
    )


def set_camera_pose(
    camera: bpy.types.Object,
    *,
    camera_distance: float,
    azimuth_deg: float,
    pitch_deg: float,
) -> None:
    azimuth = math.radians(float(azimuth_deg))
    pitch = math.radians(float(pitch_deg))
    horizontal_distance = float(camera_distance) * math.sin(pitch)
    y = horizontal_distance * math.cos(azimuth)
    x = horizontal_distance * math.sin(azimuth)
    z = float(camera_distance) * math.cos(pitch)
    camera.location = Vector((x, y, z))

    target = Vector((0.0, 0.0, 0.0))
    forward = target - camera.location
    if forward.length == 0:
        forward = Vector((0.0, 0.0, -1.0))
    forward.normalize()

    up = Vector((0.0, 0.0, 1.0))
    if abs(forward.dot(up)) > 0.999:
        up = Vector((0.0, 1.0, 0.0))
    right = forward.cross(up)
    right.normalize()
    corrected_up = right.cross(forward)
    corrected_up.normalize()
    world_up = Vector((0.0, 0.0, 1.0))
    if corrected_up.dot(world_up) < 0:
        right = -right
        corrected_up = -corrected_up

    rotation_matrix = Matrix(
        (
            (right.x, corrected_up.x, (-forward).x),
            (right.y, corrected_up.y, (-forward).y),
            (right.z, corrected_up.z, (-forward).z),
        )
    )
    camera.rotation_mode = "XYZ"
    camera.rotation_euler = rotation_matrix.to_euler("XYZ")


def get_world_to_camera(camera: bpy.types.Object) -> np.ndarray:
    location, rotation = camera.matrix_world.decompose()[0:2]
    rotation_world_to_camera = rotation.to_matrix().transposed()
    translation_world_to_camera = -(rotation_world_to_camera @ location)
    return np.asarray(
        [
            list(rotation_world_to_camera[0]) + [float(translation_world_to_camera[0])],
            list(rotation_world_to_camera[1]) + [float(translation_world_to_camera[1])],
            list(rotation_world_to_camera[2]) + [float(translation_world_to_camera[2])],
            [0.0, 0.0, 0.0, 1.0],
        ],
        dtype=np.float32,
    )


def rasterize_view(
    camera: bpy.types.Object,
    *,
    intrinsic: np.ndarray,
    resolution: int,
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    camera_to_world = matrix_to_numpy(camera.matrix_world)
    world_to_camera = get_world_to_camera(camera)
    fx = float(intrinsic[0, 0])
    fy = float(intrinsic[1, 1])
    cx = float(intrinsic[0, 2])
    cy = float(intrinsic[1, 2])
    width = int(resolution)
    height = int(resolution)

    grid_x, grid_y = np.meshgrid(
        np.arange(width, dtype=np.float64),
        np.arange(height, dtype=np.float64),
    )
    dir_x = (grid_x - cx) / fx
    dir_y = -(grid_y - cy) / fy
    directions_camera = np.stack(
        [dir_x, dir_y, np.ones_like(dir_x)],
        axis=-1,
    ).reshape(-1, 3)
    directions_camera /= np.linalg.norm(directions_camera, axis=-1, keepdims=True)

    right = camera_to_world[:3, 0]
    up = camera_to_world[:3, 1]
    forward = -camera_to_world[:3, 2]
    directions_world = (
        directions_camera[:, 0:1] * right[None, :]
        + directions_camera[:, 1:2] * up[None, :]
        + directions_camera[:, 2:3] * forward[None, :]
    )
    directions_world /= np.linalg.norm(directions_world, axis=-1, keepdims=True)
    camera_origin = np.asarray(camera_to_world[:3, 3], dtype=np.float64)

    flat_face_ids = np.full((height * width,), -1, dtype=np.int32)
    flat_hit_points = np.full((height * width, 3), np.nan, dtype=np.float32)
    flat_normals = np.full((height * width, 3), np.nan, dtype=np.float32)
    flat_depth = np.full((height * width,), np.nan, dtype=np.float32)

    depsgraph = bpy.context.evaluated_depsgraph_get()
    origin_vec = Vector(camera_origin.tolist())
    for ray_idx, direction in enumerate(directions_world):
        hit, location, normal, _, _, _ = bpy.context.scene.ray_cast(
            depsgraph,
            origin_vec,
            Vector(direction.tolist()),
        )
        if not hit:
            continue
        flat_face_ids[ray_idx] = 0
        hit_point = np.asarray((location.x, location.y, location.z), dtype=np.float32)
        hit_normal = np.asarray((normal.x, normal.y, normal.z), dtype=np.float32)
        normal_norm = float(np.linalg.norm(hit_normal))
        if normal_norm > 1e-8:
            hit_normal = hit_normal / normal_norm
        flat_hit_points[ray_idx] = hit_point
        flat_normals[ray_idx] = hit_normal
        flat_depth[ray_idx] = float((world_to_camera[:3, :3] @ hit_point.astype(np.float64) + world_to_camera[:3, 3])[2])

    return (
        camera_to_world,
        world_to_camera,
        flat_face_ids.reshape(height, width),
        flat_hit_points.reshape(height, width, 3),
        flat_normals.reshape(height, width, 3),
        flat_depth.reshape(height, width),
    )


def main() -> None:
    args = parse_args()

    output_dir = args.output_dir.resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    camera = reset_scene()
    init_render(
        engine=str(args.engine),
        resolution=int(args.resolution),
        samples=int(args.samples),
    )
    mesh_path = args.mesh_path.resolve()
    load_object(mesh_path)
    selected_up_dir = str(args.up_dir).strip()
    if selected_up_dir:
        root = create_render_root()
        orient_and_normalize(
            root,
            up_dir=canonicalize_up_dir(selected_up_dir),
            import_basis=import_basis_to_blender(mesh_path),
        )
    else:
        normalize_scene()
    init_lighting()
    intrinsic = configure_camera(
        camera,
        resolution=int(args.resolution),
    )

    for image_id, azimuth_deg in enumerate(args.azimuths):
        set_camera_pose(
            camera,
            camera_distance=float(args.camera_distance),
            azimuth_deg=float(azimuth_deg),
            pitch_deg=float(args.pitch_deg),
        )
        bpy.context.view_layer.update()
        image_path = output_dir / f"view_{image_id:03d}.png"
        camera_path = output_dir / f"view_{image_id:03d}_camera.npz"
        bpy.context.scene.render.filepath = str(image_path)
        bpy.ops.render.render(write_still=True)
        (
            camera_to_world,
            world_to_camera,
            face_ids,
            hit_points,
            normals,
            depth,
        ) = rasterize_view(
            camera,
            intrinsic=intrinsic,
            resolution=int(args.resolution),
        )
        np.savez_compressed(
            camera_path,
            intrinsic=intrinsic.astype(np.float32),
            camera_to_world=camera_to_world.astype(np.float32),
            world_to_camera=world_to_camera.astype(np.float32),
            face_ids=orient_saved_raster_payload(face_ids, flip=not selected_up_dir).astype(np.int32),
            hit_points=orient_saved_raster_payload(hit_points, flip=not selected_up_dir).astype(np.float32),
            normals=orient_saved_raster_payload(normals, flip=not selected_up_dir).astype(np.float32),
            depth=orient_saved_raster_payload(depth, flip=not selected_up_dir).astype(np.float32),
            azimuth_deg=np.float32(azimuth_deg),
            elevation_deg=np.float32(90.0 - float(args.pitch_deg)),
            pitch_deg=np.float32(args.pitch_deg),
        )
        print(
            f"Rendered auto-kinematics Blender view {image_id + 1}/{len(args.azimuths)} "
            f"(azimuth={float(azimuth_deg):.1f}, pitch={float(args.pitch_deg):.1f})"
        )


if __name__ == "__main__":
    main()