"""
TEXTure CPU Lite - Text-Guided 3D Texturing
Single-file implementation with CPU renderer and xatlas UV unwrapping.
"""
import os
import copy
import tempfile
import shutil
import zipfile
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import trimesh
import gradio as gr
from PIL import Image
from pathlib import Path
from typing import Optional, Dict, Any, Tuple
from dataclasses import dataclass

# =============================================================================
# CONFIGURATION
# =============================================================================
SD_MODEL = "radames/stable-diffusion-2-depth-img2img"  # Public copy of SD-2-Depth (original is gated)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
NUM_VIEWS = 4
RENDER_SIZE = 512
TEXTURE_RESOLUTION = 1024
NUM_INFERENCE_STEPS = 20  # Full quality (slower on CPU but better results)

# =============================================================================
# MESH CLASS (replaces Kaolin)
# =============================================================================
class Mesh:
    """CPU-compatible mesh class using trimesh."""

    def __init__(self, obj_path: str, device: str = "cpu"):
        mesh = trimesh.load(obj_path, force='mesh', process=False)
        if not isinstance(mesh, trimesh.Trimesh):
            raise ValueError(f"Failed to load mesh from {obj_path}")

        self.vertices = torch.tensor(mesh.vertices, dtype=torch.float32, device=device)
        self.faces = torch.tensor(mesh.faces, dtype=torch.long, device=device)
        self.normals, self.face_area = self._calc_normals(self.vertices, self.faces)
        self.ft = None
        self.vt = None

        if hasattr(mesh.visual, 'uv') and mesh.visual.uv is not None:
            uv = mesh.visual.uv
            if uv is not None and len(uv) > 0:
                self.vt = torch.tensor(uv, dtype=torch.float32, device=device)
                self.ft = self.faces.clone()

    @staticmethod
    def _calc_normals(vertices, faces):
        v0, v1, v2 = vertices[faces[:, 0]], vertices[faces[:, 1]], vertices[faces[:, 2]]
        n = torch.cross(v1 - v0, v2 - v0, dim=-1)
        area = torch.norm(n, dim=-1)
        n = n / (area[:, None] + 1e-8)
        return n, area / 2

    def normalize_mesh(self, inplace=False, target_scale=1.0, dy=0.0):
        mesh = self if inplace else copy.deepcopy(self)
        verts = mesh.vertices
        center = verts.mean(dim=0)
        verts = verts - center
        scale = torch.max(torch.norm(verts, p=2, dim=1))
        verts = verts / (scale + 1e-8) * target_scale
        verts[:, 1] = verts[:, 1] + dy
        mesh.vertices = verts
        return mesh

# =============================================================================
# RENDERER (replaces Kaolin render functions)
# =============================================================================
def perspective_projection(fov=np.pi/3, aspect=1.0, near=0.1, far=100.0):
    f = 1.0 / np.tan(fov / 2)
    proj = torch.zeros(4, 4)
    proj[0, 0] = f / aspect
    proj[1, 1] = f
    proj[2, 2] = (far + near) / (near - far)
    proj[2, 3] = (2 * far * near) / (near - far)
    proj[3, 2] = -1.0
    return proj

def view_matrix(pos, look_at, up):
    pos, look_at, up = pos.squeeze(), look_at.squeeze(), up.squeeze()
    forward = (look_at - pos) / (torch.norm(look_at - pos) + 1e-8)
    right = torch.linalg.cross(forward, up)
    right = right / (torch.norm(right) + 1e-8)
    new_up = torch.linalg.cross(right, forward)
    view = torch.eye(4)
    view[0, :3], view[1, :3], view[2, :3] = right, new_up, -forward
    view[0, 3] = -torch.dot(right, pos)
    view[1, 3] = -torch.dot(new_up, pos)
    view[2, 3] = torch.dot(forward, pos)
    return view.unsqueeze(0)

def camera_from_angles(elev, azim, r=3.0, look_at_height=0.0):
    x = r * torch.sin(elev) * torch.sin(azim)
    y = r * torch.cos(elev)
    z = r * torch.sin(elev) * torch.cos(azim)
    pos = torch.tensor([[x, y, z]])
    look_at = torch.zeros_like(pos)
    look_at[:, 1] = look_at_height
    return view_matrix(pos, look_at, torch.tensor([[0.0, 1.0, 0.0]]))

def prepare_vertices(vertices, faces, proj, view):
    device = vertices.device
    face_verts = vertices[faces.long()]
    ones = torch.ones(*face_verts.shape[:-1], 1, device=device)
    face_verts_h = torch.cat([face_verts, ones], dim=-1)

    view_mat = view.squeeze(0).to(device)
    face_verts_cam = torch.einsum('ij,fvj->fvi', view_mat, face_verts_h)

    proj_mat = proj.to(device)
    face_verts_clip = torch.einsum('ij,fvj->fvi', proj_mat, face_verts_cam)

    w = face_verts_clip[..., 3:4].clamp(min=1e-8)
    face_verts_ndc = face_verts_clip[..., :3] / w
    face_verts_img = face_verts_ndc[..., :2]

    v0, v1, v2 = face_verts[:, 0], face_verts[:, 1], face_verts[:, 2]
    normals = torch.cross(v1 - v0, v2 - v0, dim=-1)
    normals = normals / (torch.norm(normals, dim=-1, keepdim=True) + 1e-8)

    return face_verts_cam.unsqueeze(0), face_verts_img.unsqueeze(0), normals.unsqueeze(0)

def rasterize(width, height, face_z, face_verts_img, face_attrs):
    device = face_verts_img.device
    num_faces = face_verts_img.shape[1]
    num_attrs = face_attrs.shape[-1]

    features = torch.zeros(1, height, width, num_attrs, device=device)
    face_idx = torch.full((1, height, width, 1), -1, dtype=torch.long, device=device)
    depth_buf = torch.full((1, height, width), float('inf'), device=device)

    verts_pix = face_verts_img.clone()
    verts_pix[..., 0] = (verts_pix[..., 0] + 1) * 0.5 * width
    verts_pix[..., 1] = (1 - verts_pix[..., 1]) * 0.5 * height

    for f in range(num_faces):
        v0, v1, v2 = verts_pix[0, f, 0], verts_pix[0, f, 1], verts_pix[0, f, 2]
        z0, z1, z2 = face_z[0, f, 0], face_z[0, f, 1], face_z[0, f, 2]
        a0, a1, a2 = face_attrs[0, f, 0], face_attrs[0, f, 1], face_attrs[0, f, 2]

        min_x = max(0, int(torch.floor(torch.min(torch.stack([v0[0], v1[0], v2[0]]))).item()))
        max_x = min(width - 1, int(torch.ceil(torch.max(torch.stack([v0[0], v1[0], v2[0]]))).item()))
        min_y = max(0, int(torch.floor(torch.min(torch.stack([v0[1], v1[1], v2[1]]))).item()))
        max_y = min(height - 1, int(torch.ceil(torch.max(torch.stack([v0[1], v1[1], v2[1]]))).item()))

        if min_x > max_x or min_y > max_y:
            continue

        px = torch.arange(min_x, max_x + 1, device=device).float() + 0.5
        py = torch.arange(min_y, max_y + 1, device=device).float() + 0.5
        px_grid, py_grid = torch.meshgrid(px, py, indexing='xy')
        points = torch.stack([px_grid.flatten(), py_grid.flatten()], dim=-1)

        def edge_fn(va, vb, p):
            return (p[..., 0] - va[0]) * (vb[1] - va[1]) - (p[..., 1] - va[1]) * (vb[0] - va[0])

        area = edge_fn(v0, v1, v2)
        if abs(area.item()) < 1e-8:
            continue

        w0 = edge_fn(v1, v2, points) / area
        w1 = edge_fn(v2, v0, points) / area
        w2 = edge_fn(v0, v1, points) / area

        inside = (w0 >= 0) & (w1 >= 0) & (w2 >= 0)
        if not inside.any():
            continue

        idx = torch.where(inside)[0]
        pts, iw0, iw1, iw2 = points[idx], w0[idx], w1[idx], w2[idx]

        interp_z = iw0 * z0 + iw1 * z1 + iw2 * z2
        interp_attr = iw0.unsqueeze(-1) * a0 + iw1.unsqueeze(-1) * a1 + iw2.unsqueeze(-1) * a2

        pix_x, pix_y = pts[:, 0].long(), pts[:, 1].long()

        for i in range(len(idx)):
            x, y, z = pix_x[i].item(), pix_y[i].item(), interp_z[i].item()
            if z < depth_buf[0, y, x].item():
                depth_buf[0, y, x] = z
                features[0, y, x] = interp_attr[i]
                face_idx[0, y, x, 0] = f

    return features, face_idx

def texture_sample(uv, texture, mode='bilinear'):
    grid = uv.clone()
    grid[..., 0] = grid[..., 0] * 2 - 1
    grid[..., 1] = (1 - grid[..., 1]) * 2 - 1
    sampled = F.grid_sample(texture, grid, mode=mode, padding_mode='border', align_corners=False)
    return sampled.permute(0, 2, 3, 1).unsqueeze(1)

# =============================================================================
# TEXTURED MESH MODEL
# =============================================================================
@dataclass
class MeshConfig:
    shape_path: str = 'shapes/bunny.obj'
    shape_scale: float = 0.6
    dy: float = 0.25
    texture_resolution: int = 512

class TexturedMeshModel(nn.Module):
    def __init__(self, config: MeshConfig, render_size=256, cache_path=None, device='cpu'):
        super().__init__()
        self.device = device
        self.config = config
        self.dy = config.dy
        self.mesh_scale = config.shape_scale
        self.texture_res = config.texture_resolution
        self.cache_path = cache_path

        self.proj = perspective_projection(np.pi / 3)
        self.mesh = Mesh(config.shape_path, device).normalize_mesh(True, config.shape_scale, config.dy)

        texture = torch.ones(1, 3, self.texture_res, self.texture_res, device=device)
        self.texture_img = nn.Parameter(texture)

        self.vt, self.ft = self._init_uv()
        self.face_attrs = self.vt.unsqueeze(0)[:, self.ft.long()]

    def _init_uv(self):
        if self.cache_path:
            vt_path = Path(self.cache_path) / 'vt.pth'
            ft_path = Path(self.cache_path) / 'ft.pth'
            if vt_path.exists() and ft_path.exists():
                return torch.load(vt_path).to(self.device), torch.load(ft_path).to(self.device)

        if self.mesh.vt is not None and self.mesh.vt.shape[0] > 0:
            return self.mesh.vt.to(self.device), self.mesh.ft.to(self.device)

        import xatlas
        v_np = self.mesh.vertices.cpu().numpy()
        f_np = self.mesh.faces.int().cpu().numpy()

        atlas = xatlas.Atlas()
        atlas.add_mesh(v_np, f_np)
        opts = xatlas.ChartOptions()
        opts.max_iterations = 4
        atlas.generate(chart_options=opts)

        _, ft_np, vt_np = atlas[0]
        vt = torch.from_numpy(vt_np.astype(np.float32)).to(self.device)
        ft = torch.from_numpy(ft_np.astype(np.int64)).to(self.device)

        if self.cache_path:
            os.makedirs(self.cache_path, exist_ok=True)
            torch.save(vt.cpu(), Path(self.cache_path) / 'vt.pth')
            torch.save(ft.cpu(), Path(self.cache_path) / 'ft.pth')

        return vt, ft

    def render(self, theta, phi, radius, dims=None):
        dims = dims or (RENDER_SIZE, RENDER_SIZE)
        cam = camera_from_angles(torch.tensor(theta), torch.tensor(phi), radius, self.dy)

        verts_cam, verts_img, normals = prepare_vertices(
            self.mesh.vertices, self.mesh.faces, self.proj, cam)

        depth_attr = verts_cam[:, :, :, -1:]
        depth, _ = rasterize(dims[1], dims[0], verts_cam[:, :, :, -1], verts_img, depth_attr)

        mask_d = depth != 0
        if mask_d.any():
            d_min, d_max = depth[mask_d].min(), depth[mask_d].max()
            if d_max > d_min:
                depth[mask_d] = 0.5 + 0.5 * (depth[mask_d] - d_min) / (d_max - d_min)

        uv_feats, face_idx = rasterize(dims[1], dims[0], verts_cam[:, :, :, -1], verts_img, self.face_attrs)
        mask = (face_idx > -1).float()

        img_feats = texture_sample(uv_feats, self.texture_img).squeeze(1) * mask
        img_feats = img_feats + (1 - mask)

        return {
            'image': img_feats.permute(0, 3, 1, 2).clamp(0, 1),
            'mask': mask.permute(0, 3, 1, 2),
            'depth': depth.permute(0, 3, 1, 2),
            'render_cache': {'uv_features': uv_feats, 'face_idx': face_idx}
        }

    def export_mesh(self, path, name=''):
        os.makedirs(path, exist_ok=True)
        v_np = self.mesh.vertices.cpu().numpy()
        f_np = self.mesh.faces.int().cpu().numpy()
        vt_np = self.vt.cpu().numpy()
        ft_np = self.ft.cpu().numpy()

        tex = self.texture_img.permute(0, 2, 3, 1).clamp(0, 1)[0].detach().cpu().numpy()
        Image.fromarray((tex * 255).astype(np.uint8)).save(f'{path}/{name}albedo.png')

        with open(f'{path}/{name}mesh.obj', 'w') as fp:
            fp.write(f'mtllib {name}mesh.mtl\n')
            for v in v_np:
                fp.write(f'v {v[0]} {v[1]} {v[2]}\n')
            for v in vt_np:
                fp.write(f'vt {v[0]} {v[1]}\n')
            fp.write('usemtl mat0\n')
            for i in range(len(f_np)):
                fp.write(f"f {f_np[i,0]+1}/{ft_np[i,0]+1} {f_np[i,1]+1}/{ft_np[i,1]+1} {f_np[i,2]+1}/{ft_np[i,2]+1}\n")

        with open(f'{path}/{name}mesh.mtl', 'w') as fp:
            fp.write('newmtl mat0\nKa 1 1 1\nKd 1 1 1\nKs 0 0 0\nillum 1\n')
            fp.write(f'map_Kd {name}albedo.png\n')

# =============================================================================
# SD PIPELINE (PyTorch + INT8 Quantization)
# =============================================================================
# NOTE: ONNX doesn't support Depth2Img pipeline (5 channels vs 4)
# Using PyTorch with INT8 quantization instead
sd_pipe = None

def load_pipeline():
    global sd_pipe
    if sd_pipe is not None:
        return sd_pipe

    print("\n[INFO] Loading SD-2-Depth pipeline (PyTorch + INT8)...")
    print("[INFO] Note: ONNX not supported for Depth2Img (5-channel UNet)")
    from diffusers import StableDiffusionDepth2ImgPipeline

    try:
        print(f"[1/2] Downloading {SD_MODEL}...")
        sd_pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
            SD_MODEL,
            torch_dtype=DTYPE,
        )
        print("[OK] Model downloaded")

        # Quantize on CPU for faster inference
        if DEVICE == "cpu":
            try:
                from optimum.quanto import quantize, freeze, qint8
                print("[2/2] Applying INT8 quantization to UNet...")
                quantize(sd_pipe.unet, weights=qint8)
                freeze(sd_pipe.unet)
                print("[OK] INT8 quantization applied (3-5x faster than FP32)")
            except ImportError:
                print("[WARN] optimum.quanto not available, using FP32 (slower)")

        sd_pipe = sd_pipe.to(DEVICE)

        # Disable autocast on CPU to avoid color issues
        if DEVICE == "cpu":
            sd_pipe.set_progress_bar_config(disable=False)
            # Force FP32 for VAE to prevent color artifacts
            sd_pipe.vae = sd_pipe.vae.float()

        print("[OK] Pipeline ready!")
        return sd_pipe

    except Exception as e:
        print(f"[ERROR] Pipeline loading failed: {e}")
        if "401" in str(e) or "token" in str(e).lower():
            print("[ERROR] Authentication required. Set HF_TOKEN environment variable")
        raise

# =============================================================================
# MAIN PIPELINE
# =============================================================================
def dilate_texture(tex, mask, iterations=50):
    """Dilate texture to fill gaps using scipy morphological operations (fast)."""
    from scipy import ndimage

    result = tex.clone().detach().numpy()
    filled = mask.clone().detach().numpy().astype(bool)

    # Use scipy binary_dilation for speed
    for _ in range(iterations):
        if filled.all():
            break

        # Find boundary pixels (unfilled with filled neighbors)
        dilated_mask = ndimage.binary_dilation(filled)
        boundary = dilated_mask & ~filled

        if not boundary.any():
            break

        # For each boundary pixel, average from filled neighbors
        for c in range(3):
            # Compute neighbor average using convolution
            kernel = np.array([[1,1,1],[1,0,1],[1,1,1]], dtype=np.float32)
            neighbor_sum = ndimage.convolve(result[c] * filled, kernel, mode='constant')
            neighbor_count = ndimage.convolve(filled.astype(np.float32), kernel, mode='constant')

            # Update boundary pixels
            valid = boundary & (neighbor_count > 0)
            result[c][valid] = neighbor_sum[valid] / neighbor_count[valid]

        filled = filled | boundary

    return torch.from_numpy(result).float(), torch.from_numpy(filled)

def project_to_texture(tex, gen_img, uv, mask, blend=0.7, uv_mask=None):
    """Project generated image to UV texture using scipy interpolation."""
    from scipy.interpolate import griddata

    _, _, TH, TW = tex.shape
    new_tex = tex.clone()

    # Flatten arrays
    mask_f = mask[0, 0].reshape(-1).detach().numpy()
    uv_f = uv[0].reshape(-1, 2).detach().numpy()
    gen_np = gen_img[0].permute(1, 2, 0).reshape(-1, 3).detach().numpy()

    # Get visible pixels
    vis = mask_f > 0.5
    if vis.sum() < 10:
        return new_tex, uv_mask

    # UV coords of visible pixels (source points)
    src_uv = uv_f[vis]  # N x 2
    src_colors = gen_np[vis]  # N x 3

    # Target UV grid (destination)
    tx = np.linspace(0, 1, TW)
    ty = np.linspace(0, 1, TH)
    grid_x, grid_y = np.meshgrid(tx, ty)

    # Flip V coordinate
    src_uv_flipped = src_uv.copy()
    src_uv_flipped[:, 1] = 1 - src_uv_flipped[:, 1]

    # Interpolate each channel
    proj_tex = np.zeros((TH, TW, 3), dtype=np.float32)
    for c in range(3):
        proj_tex[:, :, c] = griddata(
            src_uv_flipped, src_colors[:, c],
            (grid_x, grid_y), method='linear', fill_value=np.nan
        )

    # Create mask of valid (non-NaN) pixels
    proj_mask = ~np.isnan(proj_tex[:, :, 0])
    proj_tex = np.nan_to_num(proj_tex, nan=0.5)

    # Track cumulative UV coverage
    if uv_mask is None:
        uv_mask = torch.zeros(TH, TW, dtype=torch.bool)

    proj_mask_t = torch.from_numpy(proj_mask)
    proj_tex_t = torch.from_numpy(proj_tex).permute(2, 0, 1).float()

    # Blend
    new_pixels = proj_mask_t & ~uv_mask
    existing_pixels = proj_mask_t & uv_mask

    for c in range(3):
        new_tex[0, c][new_pixels] = proj_tex_t[c][new_pixels]
        new_tex[0, c][existing_pixels] = blend * proj_tex_t[c][existing_pixels] + (1 - blend) * new_tex[0, c][existing_pixels]

    uv_mask = uv_mask | proj_mask_t
    return new_tex, uv_mask


def finalize_texture(tex, uv_mask, iterations=100):
    """Fill remaining gaps in texture using dilation."""
    # Extract texture as numpy
    tex_np = tex[0].clone()

    # Dilate to fill gaps
    dilated, filled = dilate_texture(tex_np, uv_mask, iterations=iterations)

    # Put back
    result = tex.clone()
    result[0] = dilated
    return result

def generate_texture(mesh_file, prompt, num_views, num_steps, seed, progress=gr.Progress()):
    if mesh_file is None:
        raise gr.Error("Please upload a mesh file!")
    if not prompt.strip():
        raise gr.Error("Please enter a text prompt!")

    temp_dir = tempfile.mkdtemp()

    try:
        mesh_ext = os.path.splitext(mesh_file)[1].lower()
        mesh_path = os.path.join(temp_dir, f"mesh{mesh_ext}")
        shutil.copy(mesh_file, mesh_path)

        progress(0.1, desc="Creating UV map...")
        config = MeshConfig(shape_path=mesh_path, texture_resolution=TEXTURE_RESOLUTION)
        model = TexturedMeshModel(config, RENDER_SIZE, Path(temp_dir) / 'cache', 'cpu')

        progress(0.2, desc="Loading SD-2-Depth...")
        pipe = load_pipeline()

        viewpoints = [(0.5, 0.0), (0.5, np.pi/2), (0.5, np.pi), (0.5, -np.pi/2), (0.2, 0.0), (0.8, 0.0)][:num_views]

        with torch.no_grad():
            model.texture_img.fill_(0.5)  # Start with neutral gray instead of white

        previews = []
        for i, (theta, phi) in enumerate(viewpoints):
            progress(0.3 + 0.5 * i / len(viewpoints), desc=f"View {i+1}/{len(viewpoints)}...")

            result = model.render(theta, phi, 2.0, (RENDER_SIZE, RENDER_SIZE))
            depth = result['depth'][0, 0].cpu().numpy()
            mask = result['mask'][0, 0].cpu().numpy()

            if mask.sum() > 0:
                d_vis = depth[mask > 0]
                d_min, d_max = d_vis.min(), d_vis.max()
                if d_max > d_min:
                    depth = (depth - d_min) / (d_max - d_min)
                depth = depth * mask

            depth_img = Image.fromarray((np.clip(depth, 0, 1) * 255).astype(np.uint8)).convert('RGB')

            gen = torch.Generator(device=DEVICE).manual_seed(int(seed))  # Same seed for consistency
            # SD-2-Depth: native depth conditioning (same as original TEXTure)
            steps = int(num_steps) if num_steps else NUM_INFERENCE_STEPS
            direction = ["front", "right side", "back", "left side"][i % 4]
            textured = pipe(
                prompt=f"{prompt}, {direction} view, consistent style",
                image=depth_img,
                strength=0.85,  # Slightly less strength for more depth adherence
                num_inference_steps=steps,
                guidance_scale=7.5,
                generator=gen
            ).images[0]
            previews.append(textured)

            uv = result['render_cache']['uv_features']
            gen_t = torch.tensor(np.array(textured)).float().permute(2, 0, 1).unsqueeze(0) / 255.0

            with torch.no_grad():
                # Track UV coverage across views
                if i == 0:
                    uv_mask = None
                model.texture_img.data, uv_mask = project_to_texture(
                    model.texture_img, gen_t, uv, result['mask'],
                    blend=0.5, uv_mask=uv_mask
                )

        progress(0.85, desc="Filling gaps...")

        # Final dilation to fill any remaining gaps
        with torch.no_grad():
            model.texture_img.data = finalize_texture(model.texture_img, uv_mask, iterations=150)

        progress(0.9, desc="Saving...")

        tex_np = model.texture_img[0].permute(1, 2, 0).clamp(0, 1).detach().numpy()
        tex_img = Image.fromarray((tex_np * 255).astype(np.uint8))
        tex_img.save(f'{temp_dir}/uv_texture.png')

        # Render 3D preview with texture
        preview_result = model.render(0.4, 0.3, 2.5, (512, 512))
        preview_np = preview_result['image'][0].permute(1, 2, 0).clamp(0, 1).detach().cpu().numpy()
        preview_img = Image.fromarray((preview_np * 255).astype(np.uint8))
        previews.insert(0, preview_img)  # Add 3D preview as first image

        model.export_mesh(f'{temp_dir}/mesh', '')

        zip_path = f'{temp_dir}/textured_mesh.zip'
        with zipfile.ZipFile(zip_path, 'w') as zf:
            for f in ['mesh/albedo.png', 'mesh/mesh.obj', 'mesh/mesh.mtl', 'uv_texture.png']:
                if os.path.exists(f'{temp_dir}/{f}'):
                    zf.write(f'{temp_dir}/{f}', os.path.basename(f))

        progress(1.0, desc="Done!")
        return tex_img, previews, zip_path

    except Exception as e:
        raise gr.Error(f"Error: {str(e)}")

# =============================================================================
# GRADIO UI
# =============================================================================
with gr.Blocks(title="TEXTure CPU Lite") as demo:
    gr.Markdown("""# TEXTure CPU Lite
Generate UV texture maps for 3D meshes using text prompts.

⚠️ **Quality Notice:** This is a simplified CPU-only demo. Results are significantly worse than the [original TEXTure paper](https://texturepaper.github.io/TEXTurePaper/).

**Why it looks bad:**
- No Kaolin GPU rasterizer → using slow software renderer with lower precision
- No proper view weighting → seams between views are visible
- No texture inpainting → blotchy patches instead of smooth transitions
- No refinement passes → single-pass projection loses detail
- INT8 quantization on CPU → color artifacts possible

**For production quality:** Use the [original TEXTure repo](https://github.com/TEXTurePaper/TEXTurePaper) with a GPU.
""")

    with gr.Row():
        with gr.Column():
            mesh_in = gr.File(label="3D Mesh (.obj, .stl, .ply, .glb)", file_types=[".obj", ".stl", ".ply", ".glb", ".off"])
            prompt_in = gr.Textbox(label="Texture Prompt", placeholder="ceramic with blue and white pattern", lines=2)
            with gr.Row():
                views_in = gr.Slider(2, 6, value=4, step=1, label="Views")
                steps_in = gr.Slider(5, 25, value=20, step=1, label="Steps (5=fast, 20=quality)")
            with gr.Row():
                seed_in = gr.Number(value=42, label="Seed", precision=0)
            btn = gr.Button("Generate", variant="primary")
            gr.Markdown("**CPU Time:** ~1.5 min/view @ 10 steps, ~3 min/view @ 20 steps")

        with gr.Column():
            tex_out = gr.Image(label="UV Texture", type="pil")
            gallery_out = gr.Gallery(label="3D Preview + Generated Views", columns=2, height=250)
            zip_out = gr.File(label="Download (ZIP)")

    btn.click(generate_texture, [mesh_in, prompt_in, views_in, steps_in, seed_in], [tex_out, gallery_out, zip_out])

    gr.Markdown("**Credits:** [TEXTure Paper](https://texturepaper.github.io/TEXTurePaper/), [SD-2-Depth](https://huggingface.co/radames/stable-diffusion-2-depth-img2img), [xatlas](https://github.com/jpcy/xatlas)")

if __name__ == "__main__":
    demo.queue(max_size=2).launch(ssr_mode=False)