Spaces:

thisaraniana
/

prompt2shape

Running

App Files Files Community

thisaraniana commited on about 1 month ago

Commit

19b13e0

1 Parent(s): 433d974

Fix: implement lazy loading for CUDA pipelines to support Hugging Face ZeroGPU

Browse files

Files changed (1) hide show

common.py +67 -54

common.py CHANGED Viewed

@@ -88,65 +88,75 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
 os.environ.setdefault("OPENAI_API_KEY", "sk-placeholder")
 MAX_SEED = 100000
-# DELIGHT = DelightingModel()
-# IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
-# IMAGESR_MODEL = ImageStableSR()
-if os.getenv("GRADIO_APP").startswith("imageto3d"):
-    RBG_REMOVER = RembgRemover()
-    RBG14_REMOVER = BMGG14Remover()
-    SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
-    if "sam3d" in os.getenv("GRADIO_APP"):
-        PIPELINE = Sam3dInference(device="cuda")
-    else:
-        PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-            "microsoft/TRELLIS-image-large"
         )
-        # PIPELINE.cuda()
-    SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
-    GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
-    AESTHETIC_CHECKER = ImageAestheticChecker()
-    CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
-    TMP_DIR = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
-    )
-    os.makedirs(TMP_DIR, exist_ok=True)
-elif os.getenv("GRADIO_APP").startswith("textto3d"):
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
-    if "sam3d" in os.getenv("GRADIO_APP"):
-        PIPELINE = Sam3dInference(device="cuda")
-    else:
-        PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-            "microsoft/TRELLIS-image-large"
-        )
-        # PIPELINE.cuda()
-    text_model_dir = "weights/Kolors"
-    PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
-    PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
     AESTHETIC_CHECKER = ImageAestheticChecker()
     CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
-    TMP_DIR = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
-    )
     os.makedirs(TMP_DIR, exist_ok=True)
 elif os.getenv("GRADIO_APP") == "texture_edit":
     DELIGHT = DelightingModel()
     IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
-    PIPELINE_IP = build_texture_gen_pipe(
-        base_ckpt_dir="./weights",
-        ip_adapt_scale=0.7,
-        device="cuda",
-    )
-    PIPELINE = build_texture_gen_pipe(
-        base_ckpt_dir="./weights",
-        ip_adapt_scale=0,
-        device="cuda",
-    )
-    TMP_DIR = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
-    )
     os.makedirs(TMP_DIR, exist_ok=True)
@@ -290,17 +300,18 @@ def image_to_3d(
         seg_image = Image.fromarray(seg_image)
     logger.info("Start generating 3D representation from image...")
-    if isinstance(PIPELINE, Sam3dInference):
-        outputs = PIPELINE.run(
             seg_image,
             seed=seed,
             stage1_inference_steps=ss_sampling_steps,
             stage2_inference_steps=slat_sampling_steps,
         )
     else:
-        PIPELINE.cuda()
         seg_image = trellis_preprocess(seg_image)
-        outputs = PIPELINE.run(
             seg_image,
             seed=seed,
             formats=["gaussian", "mesh"],
@@ -589,6 +600,7 @@ def text2image_fn(
         output_root = os.path.join(output_root, str(req.session_hash))
         os.makedirs(output_root, exist_ok=True)
     pipeline = PIPELINE_IMG if ip_image is None else PIPELINE_IMG_IP
     if ip_image is not None:
         pipeline.set_ip_adapter_scale([ip_adapt_scale])
@@ -656,7 +668,8 @@ def generate_texture_mvimages(
 ) -> list[str]:
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     use_ip_adapter = True if ip_img_path and ip_adapt_scale > 0 else False
-    PIPELINE_IP.set_ip_adapter_scale([ip_adapt_scale])
     img_save_paths = infer_pipe(
         index_file=f"{output_root}/condition/index.json",
         controlnet_cond_scale=controlnet_cond_scale,
@@ -669,7 +682,7 @@ def generate_texture_mvimages(
         prompt=prompt,
         save_dir=f"{output_root}/multi_view",
         sub_idxs=sub_idxs,
-        pipeline=PIPELINE_IP if use_ip_adapter else PIPELINE,
         seed=seed,
     )

 os.environ.setdefault("OPENAI_API_KEY", "sk-placeholder")
 MAX_SEED = 100000
+# Global pipeline placeholders
+PIPELINE = None
+PIPELINE_IMG_IP = None
+PIPELINE_IMG = None
+PIPELINE_IP = None
+PIPELINE_EDIT = None
+RBG_REMOVER = None
+RBG14_REMOVER = None
+SAM_PREDICTOR = None
+SEG_CHECKER = None
+GEO_CHECKER = None
+AESTHETIC_CHECKER = None
+CHECKERS = []
+def get_pipline():
+    global PIPELINE, PIPELINE_IMG_IP, PIPELINE_IMG
+    if PIPELINE is None:
+        if os.getenv("GRADIO_APP").startswith("imageto3d"):
+            if "sam3d" in os.getenv("GRADIO_APP"):
+                PIPELINE = Sam3dInference(device="cuda")
+            else:
+                PIPELINE = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
+        elif os.getenv("GRADIO_APP").startswith("textto3d"):
+            if "sam3d" in os.getenv("GRADIO_APP"):
+                PIPELINE = Sam3dInference(device="cuda")
+            else:
+                PIPELINE = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
+            text_model_dir = "weights/Kolors"
+            if PIPELINE_IMG_IP is None:
+                PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
+            if PIPELINE_IMG is None:
+                PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
+    return PIPELINE
+def get_texture_edit_pipeline():
+    global PIPELINE_IP, PIPELINE_EDIT
+    if PIPELINE_IP is None:
+        PIPELINE_IP = build_texture_gen_pipe(
+            base_ckpt_dir="./weights",
+            ip_adapt_scale=0.7,
+            device="cuda",
         )
+    if PIPELINE_EDIT is None:
+        PIPELINE_EDIT = build_texture_gen_pipe(
+            base_ckpt_dir="./weights",
+            ip_adapt_scale=0,
+            device="cuda",
+        )
+    return PIPELINE_IP, PIPELINE_EDIT
+# Initialize non-CUDA models
+if os.getenv("GRADIO_APP").startswith("imageto3d") or os.getenv("GRADIO_APP").startswith("textto3d"):
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
+    SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
     AESTHETIC_CHECKER = ImageAestheticChecker()
     CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
+    app_type = "imageto3d" if os.getenv("GRADIO_APP").startswith("imageto3d") else "textto3d"
+    TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"sessions/{app_type}")
     os.makedirs(TMP_DIR, exist_ok=True)
 elif os.getenv("GRADIO_APP") == "texture_edit":
     DELIGHT = DelightingModel()
     IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
+    TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit")
     os.makedirs(TMP_DIR, exist_ok=True)
         seg_image = Image.fromarray(seg_image)
     logger.info("Start generating 3D representation from image...")
+    pipeline = get_pipline()
+    if isinstance(pipeline, Sam3dInference):
+        outputs = pipeline.run(
             seg_image,
             seed=seed,
             stage1_inference_steps=ss_sampling_steps,
             stage2_inference_steps=slat_sampling_steps,
         )
     else:
+        pipeline.cuda()
         seg_image = trellis_preprocess(seg_image)
+        outputs = pipeline.run(
             seg_image,
             seed=seed,
             formats=["gaussian", "mesh"],
         output_root = os.path.join(output_root, str(req.session_hash))
         os.makedirs(output_root, exist_ok=True)
+    _ = get_pipline()
     pipeline = PIPELINE_IMG if ip_image is None else PIPELINE_IMG_IP
     if ip_image is not None:
         pipeline.set_ip_adapter_scale([ip_adapt_scale])
 ) -> list[str]:
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     use_ip_adapter = True if ip_img_path and ip_adapt_scale > 0 else False
+    pipeline_ip, pipeline_edit = get_texture_edit_pipeline()
+    pipeline_ip.set_ip_adapter_scale([ip_adapt_scale])
     img_save_paths = infer_pipe(
         index_file=f"{output_root}/condition/index.json",
         controlnet_cond_scale=controlnet_cond_scale,
         prompt=prompt,
         save_dir=f"{output_root}/multi_view",
         sub_idxs=sub_idxs,
+        pipeline=pipeline_ip if use_ip_adapter else pipeline_edit,
         seed=seed,
     )