text-to-3d-2.0

Paused

App Files Files Community

jbilcke-hf commited on Jan 24

Commit

751171e

verified ·

1 Parent(s): 2728300

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +58 -23

gradio_app.py CHANGED Viewed

@@ -86,8 +86,8 @@ def create_batch(input_image: Image.Image) -> dict[str, Any]:
     print("[debug] rgb_cond shape:", rgb_cond.shape)
     # Permute the tensors to match the expected shape [B, C, H, W]
-    rgb_cond = rgb_cond.permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
-    mask = mask.permute(2, 0, 1).unsqueeze(0)  # [1, 1, H, W]
     print("[debug] rgb_cond after permute shape:", rgb_cond.shape)
     print("[debug] mask after permute shape:", mask.shape)
@@ -106,12 +106,53 @@ def create_batch(input_image: Image.Image) -> dict[str, Any]:
     return batch
 def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024) -> tuple[str | None, Image.Image | None]:
     """Generate image from prompt and convert to 3D model."""
     try:
         # Generate image using FLUX
         generator = torch.Generator(device=device).manual_seed(seed)
         print("[debug] generating the image using Flux")
         generated_image = flux_pipe(
             prompt=prompt,
@@ -138,7 +179,7 @@ def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, heig
         print("[debug] creating the RGBA image using create_rgba_image(rgb_image, mask)")
         rgba_image = create_rgba_image(rgb_image, mask)
-        print(f"[debug] auto-cropping the rgba_image using spar3d_utils.foreground_crop(...)")
         processed_image = spar3d_utils.foreground_crop(
             rgba_image,
             crop_ratio=1.3,
@@ -146,33 +187,25 @@ def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, heig
             no_crop=False
         )
-        # Forward pass through SPAR3D
         print("[debug] preparing the batch by calling create_batch(processed_image)")
         batch = create_batch(processed_image)
         batch = {k: v.to(device) for k, v in batch.items()}
         # Generate mesh
         with torch.no_grad():
             print("[debug] calling torch.autocast(....) to generate the mesh")
             with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu', dtype=torch.bfloat16):
-                # Add point cloud conditioning to match expected input
-                if "pc_cond" not in batch:
-                    # Sample tokens from model's diffusion process
-                    cond_tokens = spar3d_model.forward_pdiff_cond(batch)
-                    sample_iter = spar3d_model.sampler.sample_batch_progressive(
-                        1,  # batch size
-                        cond_tokens,
-                        guidance_scale=3.0,
-                        device=device,
-                    )
-                    for x in sample_iter:
-                        samples = x["xstart"]
-                    # Add point cloud to batch
-                    batch["pc_cond"] = samples.permute(0, 2, 1).float()
-                    batch["pc_cond"] = spar3d_utils.normalize_pc_bbox(batch["pc_cond"])
-                    # Subsample to 512 points
-                    batch["pc_cond"] = batch["pc_cond"][:, torch.randperm(batch["pc_cond"].shape[1])[:512]]
                 trimesh_mesh, _ = spar3d_model.generate_mesh(
                     batch,
                     1024,  # texture_resolution
@@ -194,6 +227,8 @@ def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, heig
     except Exception as e:
         print(f"Error during generation: {str(e)}")
         return None, None
 # Create Gradio interface

     print("[debug] rgb_cond shape:", rgb_cond.shape)
     # Permute the tensors to match the expected shape [B, C, H, W]
+    rgb_cond = torch.movedim(rgb_cond, 2, 0).unsqueeze(0)  # [1, 3, H, W]
+    mask = torch.movedim(mask, 2, 0).unsqueeze(0)  # [1, 1, H, W]
     print("[debug] rgb_cond after permute shape:", rgb_cond.shape)
     print("[debug] mask after permute shape:", mask.shape)
     return batch
+def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
+    """Process batch through model and generate point cloud."""
+    print("[debug] Starting forward_model")
+    batch_size = batch["rgb_cond"].shape[0]
+    # Generate point cloud tokens
+    print("[debug] Generating point cloud tokens")
+    cond_tokens = system.forward_pdiff_cond(batch)
+    print("[debug] cond_tokens shape:", cond_tokens.shape)
+    # Sample points
+    print("[debug] Sampling points")
+    sample_iter = system.sampler.sample_batch_progressive(
+        batch_size,
+        cond_tokens,
+        guidance_scale=guidance_scale,
+        device=device
+    )
+    # Get final samples
+    for x in sample_iter:
+        samples = x["xstart"]
+    print("[debug] samples shape before permute:", samples.shape)
+    # Convert samples to point cloud format
+    pc_cond = samples.permute(0, 2, 1).float()
+    print("[debug] pc_cond shape after permute:", pc_cond.shape)
+    # Normalize point cloud
+    pc_cond = spar3d_utils.normalize_pc_bbox(pc_cond)
+    print("[debug] pc_cond shape after normalize:", pc_cond.shape)
+    # Subsample to 512 points
+    pc_cond = pc_cond[:, torch.randperm(pc_cond.shape[1])[:512]]
+    print("[debug] pc_cond final shape:", pc_cond.shape)
+    return pc_cond
 def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024) -> tuple[str | None, Image.Image | None]:
     """Generate image from prompt and convert to 3D model."""
     try:
+        # Set random seeds
+        torch.manual_seed(seed)
+        np.random.seed(seed)
         # Generate image using FLUX
         generator = torch.Generator(device=device).manual_seed(seed)
         print("[debug] generating the image using Flux")
         generated_image = flux_pipe(
             prompt=prompt,
         print("[debug] creating the RGBA image using create_rgba_image(rgb_image, mask)")
         rgba_image = create_rgba_image(rgb_image, mask)
+        print("[debug] auto-cropping the rgba_image using spar3d_utils.foreground_crop(...)")
         processed_image = spar3d_utils.foreground_crop(
             rgba_image,
             crop_ratio=1.3,
             no_crop=False
         )
+        # Prepare batch for processing
         print("[debug] preparing the batch by calling create_batch(processed_image)")
         batch = create_batch(processed_image)
         batch = {k: v.to(device) for k, v in batch.items()}
+        # Generate point cloud
+        pc_cond = forward_model(
+            batch,
+            spar3d_model,
+            guidance_scale=3.0,
+            seed=seed,
+            device=device
+        )
+        batch["pc_cond"] = pc_cond
         # Generate mesh
         with torch.no_grad():
             print("[debug] calling torch.autocast(....) to generate the mesh")
             with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu', dtype=torch.bfloat16):
                 trimesh_mesh, _ = spar3d_model.generate_mesh(
                     batch,
                     1024,  # texture_resolution
     except Exception as e:
         print(f"Error during generation: {str(e)}")
+        import traceback
+        traceback.print_exc()
         return None, None
 # Create Gradio interface