Spaces:

nlml
/

sheap

Sleeping

App Files Files Community

liamsch commited on Dec 1, 2025

Commit

e34628e

1 Parent(s): 8b1eedf

move ffmpeg command out of GPU decorator

Browse files

Files changed (1) hide show

gradio_demo.py +59 -47

gradio_demo.py CHANGED Viewed

@@ -144,60 +144,72 @@ def process_image(image: np.ndarray) -> Image.Image:
 @spaces.GPU
-def process_video(video_path: str, progress=gr.Progress()) -> str:
     """
-    Process a video and return path to the rendered output video using background threads.
     """
     # Initialize models on first use (lazy loading for @spaces.GPU)
     initialize_models()
-    temp_dir = Path(tempfile.mkdtemp())
     render_size = 512
-    try:
-        # Prepare dataset and dataloader
-        dataset = VideoFrameDataset(video_path, fa_model)
-        dataloader = DataLoader(dataset, batch_size=1, num_workers=0)
-        fps = dataset.fps
-        num_frames = len(dataset)
-        # Prepare rendering thread and queue
-        render_queue = Queue(maxsize=32)
-        num_render_workers = 1
-        rendering_threads = []
-        for _ in range(num_render_workers):
-            thread = RenderingThread(render_queue, temp_dir, flame.faces, c2w, render_size)
-            thread.start()
-            rendering_threads.append(thread)
-        progress(0, desc="Processing video frames...")
-        frame_idx = 0
-        with torch.no_grad():
-            for batch in dataloader:
-                images = batch["image"].to(device)
-                cropped_frames = batch["cropped_frame"]
-                # Run inference
-                predictions = sheap_model(images)
-                verts = flame(
-                    shape=predictions["shape_from_facenet"],
-                    expression=predictions["expr"],
-                    pose=pose_components_to_rotmats(predictions),
-                    eyelids=predictions["eyelids"],
-                    translation=predictions["cam_trans"],
                 )
-                verts = verts.cpu()
-                for i in range(images.shape[0]):
-                    cropped_frame = _tensor_to_numpy_image(cropped_frames[i])
-                    render_queue.put((frame_idx, cropped_frame, verts[i]))
-                    frame_idx += 1
-                    progress(
-                        frame_idx / num_frames, desc=f"Processing frame {frame_idx}/{num_frames}"
-                    )
-        # Stop rendering threads
-        for _ in range(num_render_workers):
-            render_queue.put(None)
-        for thread in rendering_threads:
-            thread.join()
-        if frame_idx == 0:
-            raise ValueError("No frames were successfully processed!")
-        # Create output video using ffmpeg
         progress(0.95, desc="Encoding video...")
         output_path = temp_dir / "output.mp4"
         ffmpeg_cmd = [

 @spaces.GPU
+def process_video_frames(video_path: str, temp_dir: Path, progress=gr.Progress()):
     """
+    Process video frames with GPU (inference and rendering).
+    Returns fps and number of frames processed.
     """
     # Initialize models on first use (lazy loading for @spaces.GPU)
     initialize_models()
     render_size = 512
+    # Prepare dataset and dataloader
+    dataset = VideoFrameDataset(video_path, fa_model)
+    dataloader = DataLoader(dataset, batch_size=1, num_workers=0)
+    fps = dataset.fps
+    num_frames = len(dataset)
+    # Prepare rendering thread and queue
+    render_queue = Queue(maxsize=32)
+    num_render_workers = 1
+    rendering_threads = []
+    for _ in range(num_render_workers):
+        thread = RenderingThread(render_queue, temp_dir, flame.faces, c2w, render_size)
+        thread.start()
+        rendering_threads.append(thread)
+    progress(0, desc="Processing video frames...")
+    frame_idx = 0
+    with torch.no_grad():
+        for batch in dataloader:
+            images = batch["image"].to(device)
+            cropped_frames = batch["cropped_frame"]
+            # Run inference
+            predictions = sheap_model(images)
+            verts = flame(
+                shape=predictions["shape_from_facenet"],
+                expression=predictions["expr"],
+                pose=pose_components_to_rotmats(predictions),
+                eyelids=predictions["eyelids"],
+                translation=predictions["cam_trans"],
+            )
+            verts = verts.cpu()
+            for i in range(images.shape[0]):
+                cropped_frame = _tensor_to_numpy_image(cropped_frames[i])
+                render_queue.put((frame_idx, cropped_frame, verts[i]))
+                frame_idx += 1
+                progress(
+                    frame_idx / num_frames, desc=f"Processing frame {frame_idx}/{num_frames}"
                 )
+    # Stop rendering threads
+    for _ in range(num_render_workers):
+        render_queue.put(None)
+    for thread in rendering_threads:
+        thread.join()
+    if frame_idx == 0:
+        raise ValueError("No frames were successfully processed!")
+    return fps, frame_idx
+def process_video(video_path: str, progress=gr.Progress()) -> str:
+    """
+    Process a video and return path to the rendered output video.
+    """
+    temp_dir = Path(tempfile.mkdtemp())
+    try:
+        # Process frames with GPU
+        fps, num_frames = process_video_frames(video_path, temp_dir, progress)
+        # Create output video using ffmpeg (CPU-only, outside GPU context)
         progress(0.95, desc="Encoding video...")
         output_path = temp_dir / "output.mp4"
         ffmpeg_cmd = [