ReconViaGen

Paused

App Files Files Community

notenoughram commited on Dec 20, 2025

Commit

2618acd

verified ·

1 Parent(s): 7136696

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -5,13 +5,13 @@ import gc
 import shutil
 from typing import *
-# [AUTO-INSTALL] accelerate
 try:
     import accelerate
 except ImportError:
     subprocess.check_call([sys.executable, "-m", "pip", "install", "accelerate"])
-# [중요] OOM 방지
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ['SPCONV_ALGO'] = 'native'
@@ -128,6 +128,7 @@ def generate_and_extract_glb(
     image_files = [image[0] for image in multiimages]
     try:
         with torch.no_grad():
             outputs, _, _ = pipeline.run(
                 image=image_files,
@@ -146,6 +147,7 @@ def generate_and_extract_glb(
             )
     except Exception as e:
         torch.cuda.empty_cache()
         raise RuntimeError(f"Generation Failed: {str(e)}")
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
@@ -217,7 +219,7 @@ demo = gr.Blocks(
     """
 )
 with demo:
-    gr.Markdown("# 💻 ReconViaGen (Device Mismatch Fixed)")
     with gr.Row():
         with gr.Column():
@@ -303,37 +305,47 @@ with demo:
 # Launch Script
 if __name__ == "__main__":
     print("🚀 Initializing Pipeline...")
     pipeline = TrellisVGGTTo3DPipeline.from_pretrained("esther11/trellis-vggt-v0-2")
-    # 1. 모든 컴포넌트를 일단 CUDA:0으로 이동 (기본 상태 일치)
-    pipeline.cuda()
-    # [핵심 수정] 파이프라인의 device 정보를 명시적으로 수정하여 입력 텐서가 GPU로 생성되게 함
-    pipeline._device = torch.device("cuda:0")
-    # birefnet도 확실히 GPU0에 있는지 확인 (이미 cuda()로 갔겠지만 안전장치)
-    pipeline.birefnet_model = pipeline.birefnet_model.to("cuda:0")
     gpu_count = torch.cuda.device_count()
     print(f"⚡ Detected {gpu_count} GPUs.")
     if gpu_count > 1:
-        print("⚡ Multi-GPU Mode: Splitting VGGT model.")
-        # VGGT 분산을 위해 잠시 CPU로 이동하여 맵 계산 (안전한 분할을 위함)
         pipeline.VGGT_model.cpu()
-        print("   - Calculating Device Map for VGGT...")
-        # 메모리 제한을 두어 강제로 분산 유도 (Block 단위 보호)
         device_map = infer_auto_device_map(
             pipeline.VGGT_model,
-            max_memory={i: "10GiB" for i in range(gpu_count)},
             no_split_module_classes=["Block", "ResnetBlock"]
         )
         pipeline.VGGT_model = dispatch_model(pipeline.VGGT_model, device_map=device_map)
-        print("✅ VGGT Model dispatched.")
     else:
-        print("⚠️ Warning: Only 1 GPU detected.")
     demo.launch()

 import shutil
 from typing import *
+# [AUTO-INSTALL] accelerate 라이브러리
 try:
     import accelerate
 except ImportError:
     subprocess.check_call([sys.executable, "-m", "pip", "install", "accelerate"])
+# [중요] OOM 방지를 위한 메모리 파편화 설정
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ['SPCONV_ALGO'] = 'native'
     image_files = [image[0] for image in multiimages]
     try:
+        # [중요] 추론 시 그래디언트 계산 끔 (메모리 절약)
         with torch.no_grad():
             outputs, _, _ = pipeline.run(
                 image=image_files,
             )
     except Exception as e:
         torch.cuda.empty_cache()
+        # 구체적인 에러 메시지 반환
         raise RuntimeError(f"Generation Failed: {str(e)}")
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     """
 )
 with demo:
+    gr.Markdown("# 💻 ReconViaGen (GPU 0 Freed)")
     with gr.Row():
         with gr.Column():
 # Launch Script
 if __name__ == "__main__":
     print("🚀 Initializing Pipeline...")
+    # 1. Pipeline 로드
     pipeline = TrellisVGGTTo3DPipeline.from_pretrained("esther11/trellis-vggt-v0-2")
+    # 2. 모든 모델을 일단 CUDA:0에 올려서 기본 설정(device mismatch 방지)을 완료함
+    pipeline.cuda()
+    pipeline._device = torch.device("cuda:0") # 내부 device 속성 고정
     gpu_count = torch.cuda.device_count()
     print(f"⚡ Detected {gpu_count} GPUs.")
     if gpu_count > 1:
+        print("⚡ Multi-GPU Mode: Offloading VGGT from GPU 0.")
+        # [핵심 로직] GPU 0을 비우기 위한 전략
+        # VGGT 모델을 잠시 CPU로 내립니다.
         pipeline.VGGT_model.cpu()
+        print("   - Calculating Device Map (Banning GPU 0 for VGGT)...")
+        # max_memory 설정:
+        # GPU 0: "10MiB" (사실상 VGGT 모델 적재 금지)
+        # GPU 1~N: "20GiB" (여유롭게 할당)
+        max_mem = {0: "10MiB"}
+        for i in range(1, gpu_count):
+            max_mem[i] = "20GiB"
+        # 이 설정으로 맵을 짜면 accelerate는 GPU 0을 건너뛰고 GPU 1부터 모델을 채웁니다.
         device_map = infer_auto_device_map(
             pipeline.VGGT_model,
+            max_memory=max_mem,
             no_split_module_classes=["Block", "ResnetBlock"]
         )
+        # 맵 적용하여 분산 로드
         pipeline.VGGT_model = dispatch_model(pipeline.VGGT_model, device_map=device_map)
+        print("✅ VGGT Model successfully pushed to GPU 1+.")
+        print("   - GPU 0: Birefnet (Preprocessing) + Controller")
+        print("   - GPU 1+: VGGT (Inference)")
     else:
+        print("⚠️ Warning: Only 1 GPU detected. Expect OOM if VRAM < 24GB.")
     demo.launch()