# Multi-HMR
# Copyright (c) 2024-present NAVER Corp.
# CC BY-NC-SA 4.0 license

import torch
import numpy as np
import trimesh
import math
from scipy.spatial.transform import Rotation
from PIL import ImageFont, ImageDraw, Image

OPENCV_TO_OPENGL_CAMERA_CONVENTION = np.array(
    [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
)


def geotrf(Trf, pts, ncol=None, norm=False):
    """Apply a geometric transformation to a list of 3-D points.
    H: 3x3 or 4x4 projection matrix (typically a Homography)
    p: numpy/torch/tuple of coordinates. Shape must be (...,2) or (...,3)

    ncol: int. number of columns of the result (2 or 3)
    norm: float. if != 0, the resut is projected on the z=norm plane.

    Returns an array of projected 2d points.
    """
    assert Trf.ndim in (2, 3)
    if isinstance(Trf, np.ndarray):
        pts = np.asarray(pts)
    elif isinstance(Trf, torch.Tensor):
        pts = torch.as_tensor(pts, dtype=Trf.dtype)

    ncol = ncol or pts.shape[-1]

    # adapt shape if necessary
    output_reshape = pts.shape[:-1]
    if Trf.ndim == 3:
        assert len(Trf) == len(pts), "batch size does not match"
    if Trf.ndim == 3 and pts.ndim > 3:
        # Trf == (B,d,d) & pts == (B,H,W,d) --> (B, H*W, d)
        pts = pts.reshape(pts.shape[0], -1, pts.shape[-1])
    elif Trf.ndim == 3 and pts.ndim == 2:
        # Trf == (B,d,d) & pts == (B,d) --> (B, 1, d)
        pts = pts[:, None, :]

    if pts.shape[-1] + 1 == Trf.shape[-1]:
        Trf = Trf.swapaxes(-1, -2)  # transpose Trf
        pts = pts @ Trf[..., :-1, :] + Trf[..., -1:, :]
    elif pts.shape[-1] == Trf.shape[-1]:
        Trf = Trf.swapaxes(-1, -2)  # transpose Trf
        pts = pts @ Trf
    else:
        pts = Trf @ pts.T
        if pts.ndim >= 2:
            pts = pts.swapaxes(-1, -2)
    if norm:
        pts = pts / pts[..., -1:]  # DONT DO /= BECAUSE OF WEIRD PYTORCH BUG
        if norm != 1:
            pts *= norm

    return pts[..., :ncol].reshape(*output_reshape, ncol)


def create_scene(
    img_pil,
    l_mesh,
    l_face,
    color=None,
    metallicFactor=0.0,
    roughnessFactor=0.5,
    focal=600,
):

    scene = trimesh.Scene(lights=trimesh.scene.lighting.Light(intensity=3.0))

    # Human meshes
    for i, mesh in enumerate(l_mesh):
        if color is None:
            _color = (
                np.random.choice(range(1, 225)) / 255,
                np.random.choice(range(1, 225)) / 255,
                np.random.choice(range(1, 225)) / 255,
            )
        else:
            if isinstance(color, list):
                _color = color[i]
            elif isinstance(color, tuple):
                _color = color
            else:
                raise NotImplementedError
        mesh = trimesh.Trimesh(mesh, l_face[i])
        mesh.visual = trimesh.visual.TextureVisuals(
            uv=None,
            material=trimesh.visual.material.PBRMaterial(
                metallicFactor=metallicFactor,
                roughnessFactor=roughnessFactor,
                alphaMode="OPAQUE",
                baseColorFactor=(_color[0], _color[1], _color[2], 1.0),
            ),
            image=None,
            face_materials=None,
        )
        scene.add_geometry(mesh)

    # Image
    H, W = img_pil.size[0], img_pil.size[1]
    screen_width = 0.3
    height = focal * screen_width / H
    width = screen_width * 0.5**0.5
    rot45 = np.eye(4)
    rot45[:3, :3] = Rotation.from_euler("z", np.deg2rad(45)).as_matrix()
    rot45[2, 3] = -height  # set the tip of the cone = optical center
    aspect_ratio = np.eye(4)
    aspect_ratio[0, 0] = W / H
    transform = OPENCV_TO_OPENGL_CAMERA_CONVENTION @ aspect_ratio @ rot45
    cam = trimesh.creation.cone(width, height, sections=4, transform=transform)
    # cam.apply_transform(transform)
    # import ipdb
    # ipdb.set_trace()

    # vertices = geotrf(transform, cam.vertices[[4,5,1,3]])
    vertices = cam.vertices[[4, 5, 1, 3]]
    faces = np.array([[0, 1, 2], [0, 2, 3], [2, 1, 0], [3, 2, 0]])
    img = trimesh.Trimesh(vertices=vertices, faces=faces)
    uv_coords = np.float32([[0, 0], [1, 0], [1, 1], [0, 1]])
    # img_pil = Image.fromarray((255. * np.ones((20,20,3))).astype(np.uint8)) # white only!
    material = trimesh.visual.texture.SimpleMaterial(
        image=img_pil,
        diffuse=[255, 255, 255, 0],
        ambient=[255, 255, 255, 0],
        specular=[255, 255, 255, 0],
        glossiness=1.0,
    )
    img.visual = trimesh.visual.TextureVisuals(
        uv=uv_coords, image=img_pil
    )  # , material=material)
    # _main_color = [255,255,255,0]
    # print(img.visual.material.ambient)
    # print(img.visual.material.diffuse)
    # print(img.visual.material.specular)
    # print(img.visual.material.main_color)

    # img.visual.material.ambient = _main_color
    # img.visual.material.diffuse = _main_color
    # img.visual.material.specular = _main_color

    # img.visual.material.main_color = _main_color
    # img.visual.material.glossiness = _main_color
    scene.add_geometry(img)

    # this is the camera mesh
    rot2 = np.eye(4)
    rot2[:3, :3] = Rotation.from_euler("z", np.deg2rad(2)).as_matrix()
    # import ipdb
    # ipdb.set_trace()
    # vertices = cam.vertices
    # print(rot2)
    vertices = np.r_[cam.vertices, 0.95 * cam.vertices, geotrf(rot2, cam.vertices)]
    # vertices = np.r_[cam.vertices, 0.95*cam.vertices, 1.05*cam.vertices]
    faces = []
    for face in cam.faces:
        if 0 in face:
            continue
        a, b, c = face
        a2, b2, c2 = face + len(cam.vertices)
        a3, b3, c3 = face + 2 * len(cam.vertices)

        # add 3 pseudo-edges
        faces.append((a, b, b2))
        faces.append((a, a2, c))
        faces.append((c2, b, c))

        faces.append((a, b, b3))
        faces.append((a, a3, c))
        faces.append((c3, b, c))

    # no culling
    faces += [(c, b, a) for a, b, c in faces]

    cam = trimesh.Trimesh(vertices=vertices, faces=faces)
    cam.visual.face_colors[:, :3] = (255, 0, 0)
    scene.add_geometry(cam)

    # OpenCV to OpenGL
    rot = np.eye(4)
    cams2world = np.eye(4)
    rot[:3, :3] = Rotation.from_euler("y", np.deg2rad(180)).as_matrix()
    scene.apply_transform(
        np.linalg.inv(cams2world @ OPENCV_TO_OPENGL_CAMERA_CONVENTION @ rot)
    )

    return scene


def length(v):
    return math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2])


def cross(v0, v1):
    return [
        v0[1] * v1[2] - v1[1] * v0[2],
        v0[2] * v1[0] - v1[2] * v0[0],
        v0[0] * v1[1] - v1[0] * v0[1],
    ]


def dot(v0, v1):
    return v0[0] * v1[0] + v0[1] * v1[1] + v0[2] * v1[2]


def normalize(v, eps=1e-13):
    l = length(v)
    return [v[0] / (l + eps), v[1] / (l + eps), v[2] / (l + eps)]


def lookAt(eye, target, *args, **kwargs):
    """
    eye is the point of view, target is the point which is looked at and up is the upwards direction.

    Input should be in OpenCV format - we transform arguments to OpenGL
    Do compute in OpenGL and then transform back to OpenCV

    """
    # Transform from OpenCV to OpenGL format
    # eye = [eye[0], -eye[1], -eye[2]]
    # target = [target[0], -target[1], -target[2]]
    up = [0, -1, 0]

    eye, at, up = eye, target, up
    zaxis = normalize((at[0] - eye[0], at[1] - eye[1], at[2] - eye[2]))
    xaxis = normalize(cross(zaxis, up))
    yaxis = cross(xaxis, zaxis)

    zaxis = [-zaxis[0], -zaxis[1], -zaxis[2]]

    viewMatrix = np.asarray(
        [
            [xaxis[0], xaxis[1], xaxis[2], -dot(xaxis, eye)],
            [yaxis[0], yaxis[1], yaxis[2], -dot(yaxis, eye)],
            [zaxis[0], zaxis[1], zaxis[2], -dot(zaxis, eye)],
            [0, 0, 0, 1],
        ]
    ).reshape(4, 4)

    # OpenGL to OpenCV
    viewMatrix = OPENCV_TO_OPENGL_CAMERA_CONVENTION @ viewMatrix

    return viewMatrix


def print_distance_on_image(pred_rend_array, humans, _color):
    # Add distance to the image.
    font = ImageFont.load_default()
    rend_pil = Image.fromarray(pred_rend_array)
    draw = ImageDraw.Draw(rend_pil)
    for i_hum, hum in enumerate(humans):
        # distance
        transl = hum["transl_pelvis"].cpu().numpy().reshape(3)
        dist_cam = np.sqrt(((transl[[0, 2]]) ** 2).sum())  # discarding Y axis
        # 2d - bbox
        bbox = get_bbox(
            hum["j2d_smplx"].cpu().numpy(), factor=1.35, output_format="x1y1x2y2"
        )
        loc = [(bbox[0] + bbox[2]) / 2.0, bbox[1]]
        txt = f"{dist_cam:.2f}m"
        length = font.getlength(txt)
        loc[0] = loc[0] - length // 2
        fill = tuple((np.asarray(_color[i_hum]) * 255).astype(np.int32).tolist())
        draw.text((loc[0], loc[1]), txt, fill=fill, font=font)
    return np.asarray(rend_pil)


def get_bbox(points, factor=1.0, output_format="xywh"):
    """
    Args:
        - y: [k,2]
    Return:
        - bbox: [4] in a specific format
    """
    assert (
        len(points.shape) == 2
    ), f"Wrong shape, expected two-dimensional array. Got shape {points.shape}"
    assert points.shape[1] == 2
    x1, x2 = points[:, 0].min(), points[:, 0].max()
    y1, y2 = points[:, 1].min(), points[:, 1].max()
    cx, cy = (x2 + x1) / 2.0, (y2 + y1) / 2.0
    sx, sy = np.abs(x2 - x1), np.abs(y2 - y1)
    sx, sy = int(factor * sx), int(factor * sy)
    x1, y1 = int(cx - sx / 2.0), int(cy - sy / 2.0)
    x2, y2 = int(cx + sx / 2.0), int(cy + sy / 2.0)
    if output_format == "xywh":
        return [x1, y1, sx, sy]
    elif output_format == "x1y1x2y2":
        return [x1, y1, x2, y2]
    else:
        raise NotImplementedError