import os from typing import Tuple import gradio as gr import numpy as np import torch import cv2 from moge.model.v2 import MoGeModel DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # ---------- Model setup ---------- @torch.no_grad() def load_model() -> MoGeModel: """ Load the mesh-capable MoGe model. NOTE: - If there is a dedicated mesh checkpoint (e.g. "Ruicheng/moge-2-vitl-mesh"), use that ID here. - If not, keep the normal one and use the mesh reconstruction API on it. """ print(f"Loading MoGe model on device: {DEVICE}") # If there is a mesh-specific checkpoint, change this string accordingly. model = MoGeModel.from_pretrained("Ruicheng/moge-2-vitl-normal") model = model.to(DEVICE) model.eval() return model MODEL = load_model() # ---------- Helper: run MoGe mesh reconstruction ---------- @torch.no_grad() def run_moge_mesh(image: np.ndarray) -> bytes: """ image: HxWx3 RGB uint8 numpy array. Returns: glb_bytes: binary GLB data with texture baked, resolution ~256. """ # Convert to float [0,1], CHW, batch img = image.astype(np.float32) / 255.0 tensor = ( torch.from_numpy(img) .permute(2, 0, 1) .unsqueeze(0) .to(DEVICE) # (1,3,H,W) ) # ---- IMPORTANT PART: call the mesh reconstruction API ---- # # You need to adjust THIS CALL to match the actual MoGe code. # Look for something like: # - MODEL.reconstruct_mesh(...) # - MODEL.mesh_reconstruct(...) # - MODEL.infer_mesh(...) # # And for arguments, look for: # - mesh_resolution / grid_resolution # - texture_size / tex_size # - enable_texture / with_texture # # Below is a TEMPLATE that you should modify once you've checked the repo. # TEMPLATE call – this will almost certainly need renaming: result = MODEL.reconstruct_mesh( tensor, mesh_resolution=256, # 256^3 grid or equivalent texture_size=256, # 256x256 texture enable_texture=True, # or with_texture=True, etc. ) # ---- Inspect result structure (one-time debugging) ---- # While debugging, you can keep these prints to see keys in Space logs: print("MoGe mesh result keys:", list(result.keys())) # Common patterns: # 1) result["glb"] -> raw GLB bytes # 2) result["mesh"] -> mesh object (trimesh / internal) with export method # Case 1: GLB bytes directly if "glb" in result: glb_bytes = result["glb"] if isinstance(glb_bytes, str): glb_bytes = glb_bytes.encode("utf-8") return glb_bytes # Case 2: mesh object with export method if "mesh" in result: mesh = result["mesh"] # If MoGe mesh exposes something like `to_glb(texture=..., texture_size=256)`: if hasattr(mesh, "to_glb"): tex = result.get("texture", None) if tex is not None: glb_bytes = mesh.to_glb(texture=tex, texture_size=256) else: glb_bytes = mesh.to_glb(texture_size=256) if isinstance(glb_bytes, str): glb_bytes = glb_bytes.encode("utf-8") return glb_bytes # Or if it expects file export: if hasattr(mesh, "export"): tmp_path = "output.glb" tex = result.get("texture", None) if tex is not None: # This is pseudocode – adapt to the actual mesh.export signature. mesh.export(tmp_path, texture=tex, texture_size=256) else: mesh.export(tmp_path) with open(tmp_path, "rb") as f: return f.read() raise RuntimeError( f"Unsupported MoGe mesh result structure: keys={list(result.keys())}" ) # ---------- Gradio inference function ---------- def infer_and_export_glb(image: np.ndarray): if image is None: raise gr.Error("Please upload an image.") glb_bytes = run_moge_mesh(image) glb_path = "output.glb" with open(glb_path, "wb") as f: f.write(glb_bytes) return glb_path # ---------- Gradio app ---------- title = "MoGe 3D Reconstruction → Textured GLB (256)" description = ( "Upload an image. MoGe reconstructs a textured 3D mesh and exports it as a GLB " "with a ~256x256 texture." ) demo = gr.Interface( fn=infer_and_export_glb, inputs=gr.Image(type="numpy", label="Input image"), outputs=gr.File(label="Download GLB (textured mesh)"), title=title, description=description, ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)