Spaces:

GF-John
/

sam2

Running on Zero

John Ho commited on Jun 5, 2025

Commit

579e65b

1 Parent(s): 0db2411

added new variable for reference_frame_idx

Files changed (2) hide show

app.py CHANGED Viewed

@@ -116,11 +116,17 @@ def process_image(
     )
-@spaces.GPU(duration=300)
 @torch.inference_mode()
 @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
 def process_video(
-    video_path: str, variant: str, masks: Union[list, str], drop_masks: bool = False
 ):
     """
     SAM2 Video Segmentation
@@ -148,6 +154,7 @@ def process_video(
         do_tidy_up=True,
         drop_mask=drop_masks,
         async_frame_load=True,
     )
@@ -196,6 +203,11 @@ with gr.Blocks() as demo:
                     """,
                 ),
                 gr.Checkbox(label="remove base64 encoded masks from result JSON"),
             ],
             outputs=gr.JSON(label="Output JSON"),
             title="SAM2 for Videos",

     )
+@spaces.GPU(
+    duration=120
+)  # user must have 2-minute of inference time left at the time of calling
 @torch.inference_mode()
 @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
 def process_video(
+    video_path: str,
+    variant: str,
+    masks: Union[list, str],
+    drop_masks: bool = False,
+    ref_frame_idx: int = 0,
 ):
     """
     SAM2 Video Segmentation
         do_tidy_up=True,
         drop_mask=drop_masks,
         async_frame_load=True,
+        ref_frame_idx=ref_frame_idx,
     )
                     """,
                 ),
                 gr.Checkbox(label="remove base64 encoded masks from result JSON"),
+                gr.Number(
+                    label="frame index for the provided object masks",
+                    value=0,
+                    precision=0,
+                ),
             ],
             outputs=gr.JSON(label="Output JSON"),
             title="SAM2 for Videos",

samv2_handler.py CHANGED Viewed

@@ -161,6 +161,7 @@ def run_sam_video_inference(
     do_tidy_up: bool = False,
     drop_mask: bool = True,
     async_frame_load: bool = False,
 ):
     # put video frames into directory
     # TODO:
@@ -183,7 +184,10 @@ def run_sam_video_inference(
     )
     for i, mask in enumerate(masks):
         model.add_new_mask(
-            inference_state=inference_state, frame_idx=0, obj_id=i, mask=mask
         )
     masks_generator = model.propagate_in_video(inference_state)

     do_tidy_up: bool = False,
     drop_mask: bool = True,
     async_frame_load: bool = False,
+    ref_frame_idx: int = 0,
 ):
     # put video frames into directory
     # TODO:
     )
     for i, mask in enumerate(masks):
         model.add_new_mask(
+            inference_state=inference_state,
+            frame_idx=ref_frame_idx,
+            obj_id=i,
+            mask=mask,
         )
     masks_generator = model.propagate_in_video(inference_state)