MatAnyone

Running on Zero

App Files Files Community

ysdede commited on Mar 9

Commit

69a1365

1 Parent(s): 315f517

Fix ZeroGPU pickle error: extract gr.SelectData coords before GPU call

Browse files

Files changed (1) hide show

hugging_face/app.py +22 -8

hugging_face/app.py CHANGED Viewed

@@ -201,29 +201,35 @@ def get_end_number(track_pause_number_slider, video_state, interactive_state):
     return video_state["painted_images"][track_pause_number_slider],interactive_state
 # use sam to get the mask
 @spaces.GPU(duration=60)
-def sam_refine(video_state, point_prompt, click_state, interactive_state, evt:gr.SelectData):
     """
     Args:
-        template_frame: PIL.Image
-        point_prompt: flag for positive or negative button click
         click_state: [[points], [labels]]
     """
     if point_prompt == "Positive":
-        coordinate = "[[{},{},1]]".format(evt.index[0], evt.index[1])
         interactive_state["positive_click_times"] += 1
     else:
-        coordinate = "[[{},{},0]]".format(evt.index[0], evt.index[1])
         interactive_state["negative_click_times"] += 1
     # prompt for sam model
     ensure_sam_on_cuda()
     model.samcontroler.sam_controler.reset_image()
     model.samcontroler.sam_controler.set_image(video_state["origin_images"][video_state["select_frame_number"]])
     prompt = get_prompt(click_state=click_state, click_input=coordinate)
-    mask, logit, painted_image = model.first_frame_click(
-                                                      image=video_state["origin_images"][video_state["select_frame_number"]],
                                                       points=np.array(prompt["input_point"]),
                                                       labels=np.array(prompt["input_label"]),
                                                       multimask=prompt["multimask_output"],
@@ -234,6 +240,14 @@ def sam_refine(video_state, point_prompt, click_state, interactive_state, evt:gr
     return painted_image, video_state, interactive_state
 def add_multi_mask(video_state, interactive_state, mask_dropdown):
     mask = video_state["masks"][video_state["select_frame_number"]]
     interactive_state["multi_mask"]["masks"].append(mask)

     return video_state["painted_images"][track_pause_number_slider],interactive_state
 # use sam to get the mask
+# ZeroGPU: gr.SelectData cannot be pickled (contains lambdas from Gradio's State.__init__).
+# We split into an outer wrapper that extracts plain data from the event,
+# and an inner @spaces.GPU function that receives only picklable arguments.
 @spaces.GPU(duration=60)
+def _sam_refine_gpu(video_state, point_prompt, click_state, interactive_state, click_x, click_y):
     """
+    Inner GPU function for SAM refinement.
     Args:
+        video_state: dict with video/image data
+        point_prompt: "Positive" or "Negative"
         click_state: [[points], [labels]]
+        interactive_state: dict with interaction state
+        click_x, click_y: integer pixel coordinates extracted from gr.SelectData
     """
     if point_prompt == "Positive":
+        coordinate = "[[{},{},1]]".format(click_x, click_y)
         interactive_state["positive_click_times"] += 1
     else:
+        coordinate = "[[{},{},0]]".format(click_x, click_y)
         interactive_state["negative_click_times"] += 1
     # prompt for sam model
     ensure_sam_on_cuda()
     model.samcontroler.sam_controler.reset_image()
     model.samcontroler.sam_controler.set_image(video_state["origin_images"][video_state["select_frame_number"]])
     prompt = get_prompt(click_state=click_state, click_input=coordinate)
+    mask, logit, painted_image = model.first_frame_click(
+                                                      image=video_state["origin_images"][video_state["select_frame_number"]],
                                                       points=np.array(prompt["input_point"]),
                                                       labels=np.array(prompt["input_label"]),
                                                       multimask=prompt["multimask_output"],
     return painted_image, video_state, interactive_state
+def sam_refine(video_state, point_prompt, click_state, interactive_state, evt: gr.SelectData):
+    """
+    Outer wrapper: extracts plain picklable coordinates from gr.SelectData,
+    then delegates to the @spaces.GPU inner function.
+    """
+    click_x, click_y = int(evt.index[0]), int(evt.index[1])
+    return _sam_refine_gpu(video_state, point_prompt, click_state, interactive_state, click_x, click_y)
 def add_multi_mask(video_state, interactive_state, mask_dropdown):
     mask = video_state["masks"][video_state["select_frame_number"]]
     interactive_state["multi_mask"]["masks"].append(mask)