FaceLift

Paused

App Files Files Community

wlyu-adobe commited on Oct 13, 2025

Commit

540ef6e

1 Parent(s): 5ca8dc1

Add CUDA memory management and reduce resolution for ZeroGPU compatibility

Browse files

Files changed (1) hide show

app.py +19 -6

app.py CHANGED Viewed

@@ -93,7 +93,7 @@ class FaceLiftPipeline:
         # Parameters
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        self.image_size = 512
         self.camera_indices = [2, 1, 0, 5, 4, 3]
         # Load models (keep on CPU for ZeroGPU compatibility)
@@ -147,8 +147,10 @@ class FaceLiftPipeline:
             self.mvdiffusion_pipeline.to(self.device)
             self.mvdiffusion_pipeline.unet.enable_xformers_memory_efficient_attention()
             self.gs_lrm_model.to(self.device)
             self.color_prompt_embedding = self.color_prompt_embedding.to(self.device)
             self._models_on_gpu = True
             print("Models on GPU, xformers enabled!")
     @spaces.GPU(duration=120)
@@ -198,6 +200,9 @@ class FaceLiftPipeline:
             multiview_path = output_dir / "multiview.png"
             multiview_image.save(multiview_path)
             # Prepare 3D reconstruction input
             view_arrays = [np.array(view) for view in selected_views]
             lrm_input = torch.from_numpy(np.stack(view_arrays, axis=0)).float()
@@ -227,12 +232,15 @@ class FaceLiftPipeline:
             })
             # Run 3D reconstruction
-            with torch.autocast(enabled=True, device_type="cuda", dtype=torch.float16):
                 result = self.gs_lrm_model.forward(batch, create_visual=False, split_data=True)
             comp_image = result.render[0].unsqueeze(0).detach()
             gaussians = result.gaussians[0]
             # Save filtered gaussians
             filtered_gaussians = gaussians.apply_all_filters(
                 cam_origins=None,
@@ -252,15 +260,20 @@ class FaceLiftPipeline:
             output_path = output_dir / "output.png"
             Image.fromarray(comp_image).save(output_path)
-            # Generate turntable video
-            turntable_frames = render_turntable(gaussians, rendering_resolution=self.image_size,
-                                               num_views=180)
-            turntable_frames = rearrange(turntable_frames, "h (v w) c -> v h w c", v=180)
             turntable_frames = np.ascontiguousarray(turntable_frames)
             turntable_path = output_dir / "turntable.mp4"
             imageseq2video(turntable_frames, str(turntable_path), fps=30)
             return str(input_path), str(multiview_path), str(output_path), \
                    str(turntable_path), str(ply_path)

         # Parameters
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.image_size = 384  # Reduced from 512 for ZeroGPU memory constraints
         self.camera_indices = [2, 1, 0, 5, 4, 3]
         # Load models (keep on CPU for ZeroGPU compatibility)
             self.mvdiffusion_pipeline.to(self.device)
             self.mvdiffusion_pipeline.unet.enable_xformers_memory_efficient_attention()
             self.gs_lrm_model.to(self.device)
+            self.gs_lrm_model.eval()  # Set to eval mode
             self.color_prompt_embedding = self.color_prompt_embedding.to(self.device)
             self._models_on_gpu = True
+            torch.cuda.empty_cache()  # Clear cache after moving models
             print("Models on GPU, xformers enabled!")
     @spaces.GPU(duration=120)
             multiview_path = output_dir / "multiview.png"
             multiview_image.save(multiview_path)
+            # Clear CUDA cache after diffusion to free memory
+            torch.cuda.empty_cache()
             # Prepare 3D reconstruction input
             view_arrays = [np.array(view) for view in selected_views]
             lrm_input = torch.from_numpy(np.stack(view_arrays, axis=0)).float()
             })
             # Run 3D reconstruction
+            with torch.no_grad(), torch.autocast(enabled=True, device_type="cuda", dtype=torch.float16):
                 result = self.gs_lrm_model.forward(batch, create_visual=False, split_data=True)
             comp_image = result.render[0].unsqueeze(0).detach()
             gaussians = result.gaussians[0]
+            # Clear CUDA cache after reconstruction
+            torch.cuda.empty_cache()
             # Save filtered gaussians
             filtered_gaussians = gaussians.apply_all_filters(
                 cam_origins=None,
             output_path = output_dir / "output.png"
             Image.fromarray(comp_image).save(output_path)
+            # Generate turntable video (reduced resolution and frames for ZeroGPU memory limits)
+            turntable_resolution = 256  # Lower resolution for turntable to save memory
+            num_turntable_views = 120  # Reduced from 180
+            turntable_frames = render_turntable(gaussians, rendering_resolution=turntable_resolution,
+                                               num_views=num_turntable_views)
+            turntable_frames = rearrange(turntable_frames, "h (v w) c -> v h w c", v=num_turntable_views)
             turntable_frames = np.ascontiguousarray(turntable_frames)
             turntable_path = output_dir / "turntable.mp4"
             imageseq2video(turntable_frames, str(turntable_path), fps=30)
+            # Final CUDA cache clear
+            torch.cuda.empty_cache()
             return str(input_path), str(multiview_path), str(output_path), \
                    str(turntable_path), str(ply_path)