zero123plus-v1.2

Diffusers

Safetensors

Zero123PlusPipeline

Model card Files Files and versions

xet

Community

maple-shaft commited on 28 days ago

Commit

88ef5ec

verified ·

1 Parent(s): 31e32d0

Only mv endpoint handler

Browse files

Files changed (1) hide show

handler.py +126 -285

handler.py CHANGED Viewed

@@ -1,285 +1,126 @@
-# This is a custom handler module for the forked HF repo maple-shaft/zero123plus-v1.2
-# Inference Endpoint hosting on HF will require this file and requirements.txt to be uploaded to the repo in the root.
-from typing import Dict, List, Any
-import os
-import gc
-import psutil
-import torch
-import base64
-import io
-from PIL import Image
-import trimesh
-import tempfile
-import pymeshlab as ml
-from hy3dgen.rembg import BackgroundRemover
-from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
-from hy3dgen.texgen import Hunyuan3DPaintPipeline
-from diffusers.pipelines.auto_pipeline import AutoPipelineForText2Image
-from diffusers import DiffusionPipeline  # pyright: ignore[reportPrivateImportUsage]
-def log_ram(tag):
-        rss = psutil.Process(os.getpid()).memory_info().rss / (1024**3)
-        print(f"[{tag}] RSS: {rss:.2f} GB", flush=True)
-class HFMultiViewGen:
-    def __init__(self,
-                 hf_token: str,
-                 mv_model: str = "maple-shaft/zero123plus-v1.2",
-                 mv_custom_pipeline: str = "sudo-ai/zero123plus-pipeline",
-                 gen_custom_pipeline: str = "",
-                 debug: bool = False):
-        self.debug = debug
-        self.hf_token = hf_token
-        self.mv_model = mv_model
-        self.mv_custom_pipeline = mv_custom_pipeline
-        self.img_to_mesh_model_parent_name = "tencent/Hunyuan3D-2"
-        self.img_to_mesh_model_name = "tencent/Hunyuan3D-2mv"
-        self.img_to_mesh_sub_name = "hunyuan3d-dit-v2-mv-turbo"
-        self.mesh_paint_sub_name = "hunyuan3d-paint-v2-0-turbo"
-        self.mesh_delight_sub_name = "hunyuan3d-delight-v2-0"
-        self.mesh_vae_sub_name = "hunyuan3d-vae-v2-0-turbo"
-        print(f"torch.cuda.is_available() = {torch.cuda.is_available()}")
-        torch.cuda.synchronize()
-        print("GPU SYNC OK", flush=True)
-        self.pipe = DiffusionPipeline.from_pretrained(
-            self.mv_model,
-            token=self.hf_token,
-            custom_pipeline=self.mv_custom_pipeline,
-            torch_dtype=torch.float16,
-            trust_remote_code=True
-        )
-        self.mesh_pipe: Hunyuan3DDiTFlowMatchingPipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
-            self.img_to_mesh_model_name,
-            subfolder=self.img_to_mesh_sub_name,
-            variant='fp16',
-        )
-        self.tex_pipe = Hunyuan3DPaintPipeline.from_pretrained(
-            self.img_to_mesh_model_parent_name
-        )
-        self.tex_pipe.config.render_size = 1024
-        self.tex_pipe.config.texture_size = 1024
-        self.tex_pipe.render.set_default_render_resolution(self.tex_pipe.config.render_size)
-        self.tex_pipe.render.set_default_texture_resolution(self.tex_pipe.config.texture_size)
-    def preprocess_images_for_mesh(self, images: dict[str, Image.Image]) -> dict[str, Image.Image]:
-        ret = {}
-        for k, v in images.items():
-            if v.mode == 'RGB':
-                rembg = BackgroundRemover()
-                v = rembg(v)
-            ret[k] = v.resize((512,512), Image.LANCZOS).convert("RGBA")
-        return ret
-    def free_gpu(self, pipe):
-        log_ram("before free_gpu")
-        gc.collect()
-        pipe.to("cpu")
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
-        torch.cuda.synchronize()
-        log_ram("after free_gpu")
-    def allocate_gpu(self, pipe):
-        log_ram("before allocate_gpu")
-        pipe.to("cuda")
-        torch.cuda.synchronize()
-        log_ram("after allocate_gpu")
-    def simplify_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh | None:
-        obj_bytes = mesh.export(file_type="ply")
-        ms = ml.MeshSet()
-        tf = None
-        remeshed_tf = None
-        try:
-            tf = tempfile.NamedTemporaryFile(delete=False, suffix=".ply")
-            tf.write(obj_bytes)
-            tf.flush()
-            ms.load_new_mesh(tf.name)
-            # Step 1: Optional smoothing (to mimic voxel smooth effect)
-            ms.apply_filter(
-                "apply_coord_laplacian_smoothing",
-                stepsmoothnum=3
-            )
-            # Step 2: Uniform resampling for smooth remeshing
-            # This is the closest PyMeshLab has to Blender's smooth voxel remesh.
-            ms.apply_filter(
-                "generate_resampled_uniform_mesh",
-                cellsize=ml.PureValue(ms.current_mesh().bounding_box().diagonal() / (2 ** 5)),  # roughly matches octree depth=5
-                offset=ml.PureValue(0.0),
-                multisample=True
-            )
-            # Step 3: Optional shrink/scale adjustment (Blender’s scale=0.9)
-            #ms.apply_filter("transform_scale_normalize", scalefactor=0.9)
-            # Step 4: Remove small disconnected pieces
-            ms.apply_filter("compute_selection_by_small_disconnected_components_per_face")
-            ms.apply_filter("meshing_remove_selected_vertices_and_faces")
-            # Step 5: (Optional) Smooth again to even out voxel edges
-            ms.apply_filter("apply_coord_taubin_smoothing", stepsmoothnum=10, lambda_=0.5, mu=-0.53)
-            remeshed_tf = tempfile.NamedTemporaryFile(delete=False, suffix=".ply")
-            ms.save_current_mesh(remeshed_tf.name)
-            remeshed_tf.flush()
-            remeshed: trimesh.Trimesh = trimesh.load_mesh(remeshed_tf, file_type="ply")
-            remeshed = remeshed.process(validate=True, merge_norm=True)
-            print(f"is_watertight = {remeshed.is_watertight}", flush=True)
-            print(f"is_volume = {remeshed.is_volume}", flush=True)
-            print(f"euler_number = {remeshed.euler_number}", flush=True)
-            return remeshed
-        except Exception as e:
-            print(e)
-        finally:
-            if tf:
-                tf.close()
-                os.remove(tf.name)
-                del tf
-            if remeshed_tf:
-                remeshed_tf.close()
-                os.remove(remeshed_tf.name)
-                del remeshed_tf
-    def generate_multiview(self, initial: Image.Image) -> dict[str, Image.Image]:
-        print(">>> generate_multiview", flush=True)
-        self.free_gpu(self.mesh_pipe)
-        self.free_gpu(self.tex_pipe)
-        self.allocate_gpu(self.pipe)
-        print("allocated second pipe to gpu", flush=True)
-        # --- prepare image properly ---
-        img = initial.convert("RGB")
-        print("converted the image to RGB", flush=True)
-        mv_result : List[Image.Image] = self.pipe(
-            image=img,
-            width=640,
-            height=960,
-            num_inference_steps=28,
-            guidance_scale=4.0,
-            num_images_per_prompt=1
-        ).images  # pyright: ignore[reportCallIssue]
-        print("mv_result", repr(mv_result), flush=True)
-        # The resulting file comes back as a 2x3 tiled PNG image, we will need to split it into a set of images
-        tile_w = 320.0 # img.width / 2.0
-        tile_h = 320.0 # img.height / 3.0
-        right_tile = (tile_w, 0.0, tile_w * 2.0, tile_h)
-        back_tile = (tile_w, tile_h, tile_w * 2.0, tile_h * 2.0)
-        left_tile = (0, tile_h * 2.0, tile_w, tile_h * 3.0)
-        ret = {
-            "front": img,
-            "right": mv_result[0].crop(right_tile),
-            "back": mv_result[0].crop(back_tile),
-            "left": mv_result[0].crop(left_tile)
-        }
-        return ret
-    def create_mesh(self, images: dict[str, Image.Image]) -> trimesh.Trimesh | None:
-        print(">>> Entered create_mesh", flush=True)
-        self.free_gpu(self.pipe)
-        self.free_gpu(self.tex_pipe)
-        self.allocate_gpu(self.mesh_pipe)
-        timages = self.preprocess_images_for_mesh(images)
-        # Mesh Pipeline
-        mesh: trimesh.Trimesh = self.mesh_pipe(
-            image=timages,
-            num_inference_steps=10,
-            octree_resolution=120,
-            num_chunks=2000,
-            output_type='trimesh'
-        )[0]
-        simplified_mesh = self.simplify_mesh(mesh)
-        return simplified_mesh
-    def texture_mesh(self, mesh: trimesh.Trimesh, preprocessed_front_image: Image.Image) -> trimesh.Trimesh | None:
-        print(">>> call texture_mesh", flush=True)
-        self.free_gpu(self.pipe)
-        self.free_gpu(self.mesh_pipe)
-        self.allocate_gpu(self.tex_pipe)
-        return self.tex_pipe(mesh=mesh, image=preprocessed_front_image)
-class EndpointHandler():
-    def __init__(self, path=""):
-        self.hf_token = os.environ["HUGGINGFACE_TOKEN"]
-        self.hf_gen = HFMultiViewGen(hf_token=self.hf_token)
-    def convert(self, fromval: dict[str, Image.Image]) -> dict[str, str]:
-        ret: dict[str, str] = {}
-        for k,v in fromval.items():
-            with io.BytesIO() as bio:
-                v.save(bio, format="PNG")
-                ret[k] = base64.b64encode(bio.getvalue()).decode()
-        return ret
-    def convert_img(self, fromval: str) -> Image.Image:
-        try:
-            print(">>> convert_img", flush=True)
-            with io.BytesIO(base64.b64decode(fromval)) as bio:
-                return Image.open(bio.getvalue())
-        except Exception as e:
-            print("Error", repr(e), flush=True)
-            raise e
-    def convert_mesh(self, fromval: trimesh.Trimesh) -> str | None:
-        print(">>> call convert_mesh", flush=True)
-        try:
-            ret: str | None = None
-            tf = tempfile.NamedTemporaryFile("w+b", suffix=".glb", delete=False)
-            tf_name: str = tf.name
-            fromval.export(tf.name)
-            tf.flush()
-            tf.close()
-            with open(tf_name, "r+b") as f:
-                ret = base64.b64encode(f.read()).decode()
-            os.remove(tf.name)
-            return ret
-        except Exception as e:
-            print("Error", repr(e), flush=True)
-            raise e
-    def __call__(self, data: Dict[str, Any]):
-        print("Entered __call__!!! ", repr(data), flush=True)
-        ret: dict[str, str] = {}
-        try:
-            img_str = data['inputs']
-            print(f"Initial image: {img_str}", flush=True)
-            img: Image.Image = self.convert_img(fromval=img_str)
-            print("Converted to image", repr(img), flush=True)
-            mv: dict[str, Image.Image] = self.hf_gen.generate_multiview(initial=img)
-            print(f"Mv Image: {mv}", flush=True)
-            mesh: trimesh.Trimesh | None = self.hf_gen.create_mesh(images=mv)
-            print(f"Created to mesh: {mesh}", flush=True)
-            if not mesh:
-                raise Exception("No mesh")
-            mesh = self.hf_gen.texture_mesh(mesh=mesh, preprocessed_front_image=img)
-            print(f"Textured mesh: {mesh}", flush=True)
-            if not mesh:
-                raise Exception("No mesh")
-            output: str | None = self.convert_mesh(fromval=mesh)
-            if not output:
-                raise Exception("No output")
-            ret["output"] = output
-            return ret
-        except Exception as e:
-            print(e)
-            raise e

+from typing import Dict, List, Any
+import os
+import torch
+from PIL import Image
+import dotenv
+import base64
+import io
+from diffusers import DiffusionPipeline  # pyright: ignore[reportPrivateImportUsage]
+dotenv.load_dotenv()
+def convert_b64_to_image(from_str: str) -> Image.Image:
+    print(">>> call convert_b64_to_image", flush=True)
+    try:
+        data: bytes = base64.b64decode(from_str)
+        with io.BytesIO(data) as bio:
+            imgfile = Image.open(bio, formats=["PNG"])
+            imgfile.load()
+            return imgfile
+    except Exception as e:
+        print(e, flush=True)
+        raise e
+def convert_image_to_b64(from_img: Image.Image) -> str:
+    print(">>> call convert_image_to_b64", flush=True)
+    try:
+        with io.BytesIO() as buffer:
+            from_img.save(buffer, format="PNG")
+            byte_data: bytes = buffer.getvalue()
+            return base64.b64encode(byte_data).decode("utf-8")
+    except Exception as e:
+        print(e, flush=True)
+        raise e
+class HFMultiViewGen:
+    def __init__(self,
+                 hf_token: str,
+                 mv_model: str = "maple-shaft/zero123plus-v1.2",
+                 mv_custom_pipeline: str = "sudo-ai/zero123plus-pipeline",
+                 gen_custom_pipeline: str = "",
+                 repo_dir: str = "/repository",
+                 debug: bool = False):
+        self.debug = debug
+        self.hf_token = hf_token
+        self.mv_model = mv_model
+        self.mv_custom_pipeline = mv_custom_pipeline
+        self.repo_dir = repo_dir
+        print(f"torch.cuda.is_available() = {torch.cuda.is_available()}")
+        torch.cuda.synchronize()
+        print("GPU SYNC OK", flush=True)
+        self.pipe = DiffusionPipeline.from_pretrained(
+            self.mv_model,
+            cache_dir=self.repo_dir,
+            token=self.hf_token,
+            custom_pipeline=self.mv_custom_pipeline,
+            dtype=torch.float16
+        ).to("cuda")
+    def generate_multiview(self, initial: Image.Image) -> dict[str, Image.Image]:
+        print(">>> generate_multiview", flush=True)
+        print("allocated second pipe to gpu", flush=True)
+        # --- prepare image properly ---
+        img = initial.convert("RGB")
+        print("converted the image to RGB", flush=True)
+        mv_result : List[Image.Image] = self.pipe(
+            image=img,
+            width=640,
+            height=960,
+            num_inference_steps=28,
+            guidance_scale=4.0,
+            num_images_per_prompt=1
+        ).images  # pyright: ignore[reportCallIssue]
+        print("mv_result", repr(mv_result), flush=True)
+        # The resulting file comes back as a 2x3 tiled PNG image, we will need to split it into a set of images
+        tile_w = 320.0 # img.width / 2.0
+        tile_h = 320.0 # img.height / 3.0
+        right_tile = (tile_w, 0.0, tile_w * 2.0, tile_h)
+        back_tile = (tile_w, tile_h, tile_w * 2.0, tile_h * 2.0)
+        left_tile = (0, tile_h * 2.0, tile_w, tile_h * 3.0)
+        ret = {
+            "front": img,
+            "right": mv_result[0].crop(right_tile),
+            "back": mv_result[0].crop(back_tile),
+            "left": mv_result[0].crop(left_tile)
+        }
+        return ret
+class EndpointHandler():
+    def __init__(self, path=""):
+        self.hf_token = os.environ["HUGGINGFACE_TOKEN"]
+        self.repo_dir = os.environ["HF_HUB_CACHE"]
+        self.hf_gen = HFMultiViewGen(hf_token=self.hf_token, repo_dir=self.repo_dir)
+    def convert(self, fromval: dict[str, Image.Image]) -> dict[str, str]:
+        ret: dict[str, str] = {}
+        for k,v in fromval.items():
+            ret[k] = convert_image_to_b64(v)
+        return ret
+    def __call__(self, data: Dict[str, Any]):
+        print("Entered __call__!!! ", repr(data), flush=True)
+        ret: dict[str, Any] = {}
+        try:
+            img_str = data['inputs']
+            print(f"Initial image: {img_str}", flush=True)
+            img: Image.Image = convert_b64_to_image(img_str)
+            print("Converted to image", repr(img), flush=True)
+            mv: dict[str, Image.Image] = self.hf_gen.generate_multiview(initial=img)
+            print(f"Mv Image: {mv}", flush=True)
+            mv_str: Dict[str,str] = self.convert(mv)
+            ret["output"] = mv_str
+            return ret
+        except Exception as e:
+            print(e)
+            raise e