xinjie.wang commited on
Commit
2c93ef4
·
1 Parent(s): 6bf795c
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🖼️
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 6.8.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py CHANGED
@@ -19,9 +19,9 @@ import os
19
 
20
  # GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
21
  # GRADIO_APP == "imageto3d", TRELLIS model.
22
- os.environ["GRADIO_APP"] = "imageto3d"
23
  from glob import glob
24
- # test
25
  import gradio as gr
26
  from app_style import custom_theme, image_css, lighting_css
27
  from common import (
@@ -362,6 +362,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
362
  inputs=image_prompt,
363
  outputs=generate_btn,
364
  )
 
365
  rmbg_tag.change(
366
  set_current_rmbg_tag,
367
  inputs=[rmbg_tag],
@@ -490,24 +491,23 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
490
  is_samimage,
491
  ],
492
  outputs=[output_buf, video_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  )
494
- # .success(
495
- # extract_3d_representations_v3,
496
- # inputs=[
497
- # output_buf,
498
- # project_delight,
499
- # texture_size,
500
- # ],
501
- # outputs=[
502
- # model_output_mesh,
503
- # model_output_gs,
504
- # model_output_obj,
505
- # aligned_gs,
506
- # ],
507
- # ).success(
508
- # lambda: gr.Button(interactive=True),
509
- # outputs=[extract_urdf_btn],
510
- # )
511
 
512
  extract_urdf_btn.click(
513
  extract_urdf,
 
19
 
20
  # GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
21
  # GRADIO_APP == "imageto3d", TRELLIS model.
22
+ os.environ["GRADIO_APP"] = "imageto3d_sam3d"
23
  from glob import glob
24
+
25
  import gradio as gr
26
  from app_style import custom_theme, image_css, lighting_css
27
  from common import (
 
362
  inputs=image_prompt,
363
  outputs=generate_btn,
364
  )
365
+
366
  rmbg_tag.change(
367
  set_current_rmbg_tag,
368
  inputs=[rmbg_tag],
 
491
  is_samimage,
492
  ],
493
  outputs=[output_buf, video_output],
494
+ ).success(
495
+ extract_3d_representations_v3,
496
+ inputs=[
497
+ output_buf,
498
+ project_delight,
499
+ texture_size,
500
+ ],
501
+ outputs=[
502
+ model_output_mesh,
503
+ model_output_gs,
504
+ model_output_obj,
505
+ aligned_gs,
506
+ ],
507
+ ).success(
508
+ lambda: gr.Button(interactive=True),
509
+ outputs=[extract_urdf_btn],
510
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
  extract_urdf_btn.click(
513
  extract_urdf,
app_style.py CHANGED
@@ -20,7 +20,7 @@ from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
20
  lighting_css = """
21
  <style>
22
  #lighter_mesh canvas {
23
- filter: brightness(1) !important;
24
  }
25
  </style>
26
  """
 
20
  lighting_css = """
21
  <style>
22
  #lighter_mesh canvas {
23
+ filter: brightness(2.3) !important;
24
  }
25
  </style>
26
  """
common.py CHANGED
@@ -263,7 +263,7 @@ def select_point(
263
  return (image, masks), seg_image
264
 
265
 
266
- @spaces.GPU(duration=30)
267
  def image_to_3d(
268
  image: Image.Image,
269
  seed: int,
@@ -276,13 +276,12 @@ def image_to_3d(
276
  is_sam_image: bool = False,
277
  req: gr.Request = None,
278
  ) -> tuple[dict, str]:
279
- print("step1", flush=True)
280
  if is_sam_image:
281
  seg_image = filter_image_small_connected_components(sam_image)
282
  seg_image = Image.fromarray(seg_image, mode="RGBA")
283
  else:
284
  seg_image = image
285
- print("step2", flush=True)
286
  if isinstance(seg_image, np.ndarray):
287
  seg_image = Image.fromarray(seg_image)
288
 
@@ -313,26 +312,24 @@ def image_to_3d(
313
  )
314
  # Set back to cpu for memory saving.
315
  PIPELINE.cpu()
316
- print("step3", flush=True)
317
  gs_model = outputs["gaussian"][0]
318
  mesh_model = outputs["mesh"][0]
319
- # color_images = render_video(gs_model, r=1.85)["color"]
320
- # normal_images = render_video(mesh_model, r=1.85)["normal"]
321
 
322
- # output_root = os.path.join(TMP_DIR, str(req.session_hash))
323
- # os.makedirs(output_root, exist_ok=True)
324
- # seg_image.save(f"{output_root}/seg_image.png")
325
- # raw_image_cache.save(f"{output_root}/raw_image.png")
326
 
327
- # video_path = os.path.join(output_root, "gs_mesh.mp4")
328
- # merge_images_video(color_images, normal_images, video_path)
329
  state = pack_state(gs_model, mesh_model)
330
-
331
- # gc.collect()
332
- # torch.cuda.empty_cache()
333
 
334
- video_path = None
335
- print("step4", flush=True)
 
336
  return state, video_path
337
 
338
 
@@ -567,7 +564,7 @@ def extract_urdf(
567
  )
568
 
569
 
570
- @spaces.GPU(duration=300)
571
  def text2image_fn(
572
  prompt: str,
573
  guidance_scale: float,
@@ -623,7 +620,7 @@ def text2image_fn(
623
  return save_paths + save_paths
624
 
625
 
626
- @spaces.GPU(duration=120)
627
  def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
628
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
629
 
@@ -639,7 +636,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
639
  return None, None, None
640
 
641
 
642
- @spaces.GPU(duration=300)
643
  def generate_texture_mvimages(
644
  prompt: str,
645
  controlnet_cond_scale: float = 0.55,
@@ -726,7 +723,7 @@ def backproject_texture(
726
  return output_glb_mesh, output_obj_mesh, zip_file
727
 
728
 
729
- @spaces.GPU(duration=300)
730
  def backproject_texture_v2(
731
  mesh_path: str,
732
  input_image: str,
@@ -773,7 +770,7 @@ def backproject_texture_v2(
773
  return output_glb_mesh, output_obj_mesh, zip_file
774
 
775
 
776
- @spaces.GPU(duration=120)
777
  def render_result_video(
778
  mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
779
  ) -> str:
 
263
  return (image, masks), seg_image
264
 
265
 
266
+ @spaces.GPU
267
  def image_to_3d(
268
  image: Image.Image,
269
  seed: int,
 
276
  is_sam_image: bool = False,
277
  req: gr.Request = None,
278
  ) -> tuple[dict, str]:
 
279
  if is_sam_image:
280
  seg_image = filter_image_small_connected_components(sam_image)
281
  seg_image = Image.fromarray(seg_image, mode="RGBA")
282
  else:
283
  seg_image = image
284
+
285
  if isinstance(seg_image, np.ndarray):
286
  seg_image = Image.fromarray(seg_image)
287
 
 
312
  )
313
  # Set back to cpu for memory saving.
314
  PIPELINE.cpu()
315
+
316
  gs_model = outputs["gaussian"][0]
317
  mesh_model = outputs["mesh"][0]
318
+ color_images = render_video(gs_model, r=1.85)["color"]
319
+ normal_images = render_video(mesh_model, r=1.85)["normal"]
320
 
321
+ output_root = os.path.join(TMP_DIR, str(req.session_hash))
322
+ os.makedirs(output_root, exist_ok=True)
323
+ seg_image.save(f"{output_root}/seg_image.png")
324
+ raw_image_cache.save(f"{output_root}/raw_image.png")
325
 
326
+ video_path = os.path.join(output_root, "gs_mesh.mp4")
327
+ merge_images_video(color_images, normal_images, video_path)
328
  state = pack_state(gs_model, mesh_model)
 
 
 
329
 
330
+ gc.collect()
331
+ torch.cuda.empty_cache()
332
+
333
  return state, video_path
334
 
335
 
 
564
  )
565
 
566
 
567
+ @spaces.GPU
568
  def text2image_fn(
569
  prompt: str,
570
  guidance_scale: float,
 
620
  return save_paths + save_paths
621
 
622
 
623
+ @spaces.GPU
624
  def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
625
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
626
 
 
636
  return None, None, None
637
 
638
 
639
+ @spaces.GPU
640
  def generate_texture_mvimages(
641
  prompt: str,
642
  controlnet_cond_scale: float = 0.55,
 
723
  return output_glb_mesh, output_obj_mesh, zip_file
724
 
725
 
726
+ @spaces.GPU
727
  def backproject_texture_v2(
728
  mesh_path: str,
729
  input_image: str,
 
770
  return output_glb_mesh, output_obj_mesh, zip_file
771
 
772
 
773
+ @spaces.GPU
774
  def render_result_video(
775
  mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
776
  ) -> str:
embodied_gen/data/backproject_v2.py CHANGED
@@ -596,7 +596,7 @@ class TextureBacker:
596
 
597
  return texture
598
 
599
- @spaces.GPU()
600
  def compute_texture(
601
  self,
602
  colors: list[Image.Image],
 
596
 
597
  return texture
598
 
599
+ @spaces.GPU
600
  def compute_texture(
601
  self,
602
  colors: list[Image.Image],
embodied_gen/data/backproject_v3.py CHANGED
@@ -425,7 +425,7 @@ def parse_args():
425
  return args
426
 
427
 
428
- @spaces.GPU()
429
  def entrypoint(
430
  delight_model: DelightingModel = None,
431
  imagesr_model: ImageRealESRGAN = None,
 
425
  return args
426
 
427
 
428
+ @spaces.GPU
429
  def entrypoint(
430
  delight_model: DelightingModel = None,
431
  imagesr_model: ImageRealESRGAN = None,
embodied_gen/data/mesh_operator.py CHANGED
@@ -412,7 +412,7 @@ class MeshFixer(object):
412
  dtype=torch.int32,
413
  )
414
 
415
- @spaces.GPU(duration=300)
416
  def __call__(
417
  self,
418
  filter_ratio: float,
 
412
  dtype=torch.int32,
413
  )
414
 
415
+ @spaces.GPU
416
  def __call__(
417
  self,
418
  filter_ratio: float,
embodied_gen/models/delight_model.py CHANGED
@@ -140,7 +140,7 @@ class DelightingModel(object):
140
 
141
  return new_image
142
 
143
- @spaces.GPU(duration=120)
144
  @torch.no_grad()
145
  def __call__(
146
  self,
 
140
 
141
  return new_image
142
 
143
+ @spaces.GPU
144
  @torch.no_grad()
145
  def __call__(
146
  self,
embodied_gen/models/sam3d.py CHANGED
@@ -51,6 +51,7 @@ class Sam3dInference:
51
  Args:
52
  local_dir (str): Directory to store or load model weights and configs.
53
  compile (bool): Whether to compile the model for faster inference.
 
54
 
55
  Methods:
56
  merge_mask_to_rgba(image, mask):
@@ -62,7 +63,7 @@ class Sam3dInference:
62
  """
63
 
64
  def __init__(
65
- self, local_dir: str = "weights/sam-3d-objects", compile: bool = False
66
  ) -> None:
67
  if not os.path.exists(local_dir):
68
  snapshot_download("facebook/sam-3d-objects", local_dir=local_dir)
@@ -78,6 +79,7 @@ class Sam3dInference:
78
  config["slat_decoder_gs_ckpt_path"] = config.pop(
79
  "slat_decoder_gs_4_ckpt_path", "slat_decoder_gs_4.ckpt"
80
  )
 
81
  self.pipeline: InferencePipelinePointMap = instantiate(config)
82
 
83
  def merge_mask_to_rgba(
 
51
  Args:
52
  local_dir (str): Directory to store or load model weights and configs.
53
  compile (bool): Whether to compile the model for faster inference.
54
+ device (str): Device to run the model on (e.g., "cuda" or "cpu").
55
 
56
  Methods:
57
  merge_mask_to_rgba(image, mask):
 
63
  """
64
 
65
  def __init__(
66
+ self, local_dir: str = "weights/sam-3d-objects", compile: bool = False, device: str = "cuda",
67
  ) -> None:
68
  if not os.path.exists(local_dir):
69
  snapshot_download("facebook/sam-3d-objects", local_dir=local_dir)
 
79
  config["slat_decoder_gs_ckpt_path"] = config.pop(
80
  "slat_decoder_gs_4_ckpt_path", "slat_decoder_gs_4.ckpt"
81
  )
82
+ config["device"] = device
83
  self.pipeline: InferencePipelinePointMap = instantiate(config)
84
 
85
  def merge_mask_to_rgba(
embodied_gen/models/segment_model.py CHANGED
@@ -373,7 +373,6 @@ class BMGG14Remover(object):
373
  "image-segmentation",
374
  model="briaai/RMBG-1.4",
375
  trust_remote_code=True,
376
- device="cuda",
377
  )
378
 
379
  def __call__(
 
373
  "image-segmentation",
374
  model="briaai/RMBG-1.4",
375
  trust_remote_code=True,
 
376
  )
377
 
378
  def __call__(
embodied_gen/models/sr_model.py CHANGED
@@ -80,7 +80,7 @@ class ImageStableSR:
80
  self.up_pipeline_x4.set_progress_bar_config(disable=True)
81
  # self.up_pipeline_x4.enable_model_cpu_offload()
82
 
83
- @spaces.GPU(duration=120)
84
  def __call__(
85
  self,
86
  image: Union[Image.Image, np.ndarray],
@@ -196,7 +196,7 @@ class ImageRealESRGAN:
196
  half=True,
197
  )
198
 
199
- @spaces.GPU(duration=120)
200
  def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
201
  """Performs super-resolution on the input image.
202
 
 
80
  self.up_pipeline_x4.set_progress_bar_config(disable=True)
81
  # self.up_pipeline_x4.enable_model_cpu_offload()
82
 
83
+ @spaces.GPU
84
  def __call__(
85
  self,
86
  image: Union[Image.Image, np.ndarray],
 
196
  half=True,
197
  )
198
 
199
+ @spaces.GPU
200
  def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
201
  """Performs super-resolution on the input image.
202
 
embodied_gen/scripts/render_gs.py CHANGED
@@ -96,7 +96,7 @@ def parse_args():
96
  return args
97
 
98
 
99
- @spaces.GPU(duration=120)
100
  def entrypoint(**kwargs) -> None:
101
  args = parse_args()
102
  for k, v in kwargs.items():
 
96
  return args
97
 
98
 
99
+ @spaces.GPU
100
  def entrypoint(**kwargs) -> None:
101
  args = parse_args()
102
  for k, v in kwargs.items():
embodied_gen/utils/monkey_patch/sam3d.py CHANGED
@@ -380,7 +380,7 @@ def monkey_patch_sam3d():
380
 
381
  InferencePipeline.__init__ = patch_init
382
 
383
- # patch_pointmap_infer_pipeline()
384
- # patch_infer_init()
385
 
386
  return
 
380
 
381
  InferencePipeline.__init__ = patch_init
382
 
383
+ patch_pointmap_infer_pipeline()
384
+ patch_infer_init()
385
 
386
  return
embodied_gen/utils/process_media.py CHANGED
@@ -53,7 +53,7 @@ __all__ = [
53
  ]
54
 
55
 
56
- @spaces.GPU(duration=120)
57
  def render_asset3d(
58
  mesh_path: str,
59
  output_root: str,
 
53
  ]
54
 
55
 
56
+ @spaces.GPU
57
  def render_asset3d(
58
  mesh_path: str,
59
  output_root: str,
embodied_gen/utils/trender.py CHANGED
@@ -43,7 +43,7 @@ __all__ = [
43
  ]
44
 
45
 
46
- @spaces.GPU(duration=120)
47
  def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
48
  renderer = MeshRenderer()
49
  renderer.rendering_options.resolution = options.get("resolution", 512)
@@ -66,7 +66,7 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
66
  return rets
67
 
68
 
69
- @spaces.GPU(duration=120)
70
  def render_gs_frames(
71
  sample,
72
  extrinsics,
@@ -117,7 +117,7 @@ def render_gs_frames(
117
  return dict(outputs)
118
 
119
 
120
- @spaces.GPU(duration=120)
121
  def render_video(
122
  sample,
123
  resolution=512,
@@ -149,7 +149,7 @@ def render_video(
149
  return result
150
 
151
 
152
- @spaces.GPU(duration=120)
153
  def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
154
  return {
155
  "gaussian": {
 
43
  ]
44
 
45
 
46
+ @spaces.GPU
47
  def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
48
  renderer = MeshRenderer()
49
  renderer.rendering_options.resolution = options.get("resolution", 512)
 
66
  return rets
67
 
68
 
69
+ @spaces.GPU
70
  def render_gs_frames(
71
  sample,
72
  extrinsics,
 
117
  return dict(outputs)
118
 
119
 
120
+ @spaces.GPU
121
  def render_video(
122
  sample,
123
  resolution=512,
 
149
  return result
150
 
151
 
152
+ @spaces.GPU
153
  def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
154
  return {
155
  "gaussian": {
requirements.txt CHANGED
@@ -20,9 +20,9 @@ igraph==0.11.8
20
  pyvista==0.36.1
21
  openai==1.58.1
22
  transformers==4.42.4
23
- gradio==6.8.0
24
  sentencepiece==0.2.0
25
- diffusers==0.34.0
26
  xatlas==0.0.9
27
  onnxruntime==1.20.1
28
  tenacity==8.2.2
 
20
  pyvista==0.36.1
21
  openai==1.58.1
22
  transformers==4.42.4
23
+ gradio==5.12.0
24
  sentencepiece==0.2.0
25
+ diffusers==0.31.0
26
  xatlas==0.0.9
27
  onnxruntime==1.20.1
28
  tenacity==8.2.2