import numpy as np
import torch
import trimesh
import io
from PIL import Image


_triposr_cache = None


def get_triposr():
    global _triposr_cache
    if _triposr_cache is None:
        from transformers import TripoSRForImageTo3D, TripoSRImageProcessor
        processor = TripoSRImageProcessor.from_pretrained("stabilityai/TripoSR")
        model = TripoSRForImageTo3D.from_pretrained("stabilityai/TripoSR")
        model.eval()
        _triposr_cache = (model, processor)
    return _triposr_cache


def reconstruct_region(image: Image.Image, mask: list[list[bool]], bbox: list[int]) -> bytes:
    """
    Crop the masked region from the image, run TripoSR, return GLB bytes.
    """
    model, processor = get_triposr()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # Crop to bounding box with 20% padding
    x, y, w, h = bbox
    pad_x = int(w * 0.20)
    pad_y = int(h * 0.20)
    W, H = image.size
    x0 = max(0, x - pad_x)
    y0 = max(0, y - pad_y)
    x1 = min(W, x + w + pad_x)
    y1 = min(H, y + h + pad_y)
    cropped = image.crop((x0, y0, x1, y1)).resize((512, 512), Image.LANCZOS)

    # Apply mask as alpha channel so TripoSR focuses on the region
    mask_arr = np.array(mask, dtype=np.uint8)[y0:y1, x0:x1]
    mask_resized = np.array(
        Image.fromarray(mask_arr * 255).resize((512, 512), Image.NEAREST)
    )
    rgba = np.array(cropped.convert("RGBA"))
    rgba[:, :, 3] = mask_resized
    input_img = Image.fromarray(rgba)

    inputs = processor(images=input_img, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    # Export as GLB via trimesh
    mesh_data = outputs.mesh  # TripoSR returns a trimesh-compatible object
    if hasattr(mesh_data, "export"):
        glb_bytes = mesh_data.export(file_type="glb")
    else:
        # Fallback: build trimesh from vertices/faces tensors
        verts = mesh_data.verts_list()[0].cpu().numpy()
        faces = mesh_data.faces_list()[0].cpu().numpy()
        mesh = trimesh.Trimesh(vertices=verts, faces=faces, process=False)
        buf = io.BytesIO()
        mesh.export(buf, file_type="glb")
        glb_bytes = buf.getvalue()

    return glb_bytes


def depth_to_mesh(depth: list[list[float]], mask: list[list[bool]], image: Image.Image) -> bytes:
    """
    Fallback when TripoSR isn't available: lift depth map into a 3D mesh
    constrained to the masked region, textured with the source image.
    """
    depth_arr = np.array(depth, dtype=np.float32)
    mask_arr = np.array(mask, dtype=bool)
    H, W = depth_arr.shape

    # Normalize depth to [0, 1] then scale to reasonable Z range
    dmin, dmax = depth_arr.min(), depth_arr.max()
    if dmax > dmin:
        depth_norm = (depth_arr - dmin) / (dmax - dmin)
    else:
        depth_norm = np.zeros_like(depth_arr)
    depth_scaled = depth_norm * 0.5  # 0.5 units of Z range

    # Build vertex grid only for masked pixels
    ys, xs = np.where(mask_arr)
    if len(xs) == 0:
        # Empty mask — return a flat quad
        verts = np.array([[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0]], dtype=np.float32)
        faces = np.array([[0, 1, 2], [0, 2, 3]])
        mesh = trimesh.Trimesh(vertices=verts, faces=faces)
        buf = io.BytesIO()
        mesh.export(buf, file_type="glb")
        return buf.getvalue()

    # Normalize to [-0.5, 0.5] XY space
    x_norm = (xs / W) - 0.5
    y_norm = 0.5 - (ys / H)
    z_vals = depth_scaled[ys, xs]
    vertices = np.stack([x_norm, y_norm, z_vals], axis=1).astype(np.float32)

    # UV = source pixel position
    uvs = np.stack([xs / W, 1.0 - ys / H], axis=1).astype(np.float32)

    # Triangulate the masked grid using Delaunay
    from scipy.spatial import Delaunay
    points_2d = np.stack([x_norm, y_norm], axis=1)
    tri = Delaunay(points_2d)
    faces = tri.simplices.astype(np.int32)

    # Build mesh with texture
    img_arr = np.array(image.convert("RGB"))
    texture = trimesh.visual.texture.TextureVisuals(
        uv=uvs,
        image=Image.fromarray(img_arr),
    )
    mesh = trimesh.Trimesh(vertices=vertices, faces=faces, visual=texture, process=False)
    buf = io.BytesIO()
    mesh.export(buf, file_type="glb")
    return buf.getvalue()