sam2-playground

Runtime error

App Files Files Community

jhj0517 commited on Aug 28, 2024

Commit

ed7d6af

2 Parent(s): 76d1f6d 84188d4

Merge branch 'master' into huggingface

Browse files

Files changed (4) hide show

app.py +6 -5
configs/default_hparams.yaml +1 -0
modules/mask_utils.py +6 -1
modules/sam_inference.py +30 -3

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ class App:
         self.image_modes = [AUTOMATIC_MODE, BOX_PROMPT_MODE]
         self.default_mode = BOX_PROMPT_MODE
         self.filter_modes = [PIXELIZE_FILTER, COLOR_FILTER]
-        self.default_filter = PIXELIZE_FILTER
         self.default_color = DEFAULT_COLOR
         self.default_pixel_size = DEFAULT_PIXEL_SIZE
         default_hparam_config_path = os.path.join(SAM2_CONFIGS_DIR, "default_hparams.yaml")
@@ -132,6 +132,7 @@ class App:
                                 nb_pixel_size = gr.Number(label="Pixel Size", interactive=True, minimum=1,
                                                           visible=self.default_filter == PIXELIZE_FILTER,
                                                           value=self.default_pixel_size)
                                 btn_generate_preview = gr.Button("GENERATE PREVIEW")
                     with gr.Row():
@@ -157,7 +158,7 @@ class App:
                                                    nb_pixel_size])
                     preview_params = [vid_frame_prompter, dd_filter_mode, sld_frame_selector, nb_pixel_size,
-                                      cp_color_picker]
                     btn_generate_preview.click(fn=self.sam_inf.add_filter_to_preview,
                                                inputs=preview_params,
                                                outputs=[img_preview])
@@ -180,6 +181,7 @@ class App:
                                                          choices=self.image_modes)
                             dd_models = gr.Dropdown(label="Model", value=DEFAULT_MODEL_TYPE,
                                                     choices=self.sam_inf.available_models)
                             with gr.Accordion("Mask Parameters", open=False, visible=self.default_mode == AUTOMATIC_MODE) as acc_mask_hparams:
                                 mask_hparams_component = self.mask_generation_parameters(_mask_hparams)
@@ -194,10 +196,9 @@ class App:
                             output_file = gr.File(label="Generated psd file", scale=9)
                             btn_open_folder = gr.Button("📁\nOpen PSD folder", scale=1)
-                    sources = [img_input, img_input_prompter, dd_input_modes]
-                    model_params = [dd_models]
                     mask_hparams = mask_hparams_component + [cb_multimask_output]
-                    input_params = sources + model_params + mask_hparams
                     btn_generate.click(fn=self.sam_inf.divide_layer,
                                        inputs=input_params, outputs=[gallery_output, output_file])

         self.image_modes = [AUTOMATIC_MODE, BOX_PROMPT_MODE]
         self.default_mode = BOX_PROMPT_MODE
         self.filter_modes = [PIXELIZE_FILTER, COLOR_FILTER]
+        self.default_filter = COLOR_FILTER
         self.default_color = DEFAULT_COLOR
         self.default_pixel_size = DEFAULT_PIXEL_SIZE
         default_hparam_config_path = os.path.join(SAM2_CONFIGS_DIR, "default_hparams.yaml")
                                 nb_pixel_size = gr.Number(label="Pixel Size", interactive=True, minimum=1,
                                                           visible=self.default_filter == PIXELIZE_FILTER,
                                                           value=self.default_pixel_size)
+                                cb_invert_mask = gr.Checkbox(label="invert mask", value=_mask_hparams["invert_mask"])
                                 btn_generate_preview = gr.Button("GENERATE PREVIEW")
                     with gr.Row():
                                                    nb_pixel_size])
                     preview_params = [vid_frame_prompter, dd_filter_mode, sld_frame_selector, nb_pixel_size,
+                                      cp_color_picker, cb_invert_mask]
                     btn_generate_preview.click(fn=self.sam_inf.add_filter_to_preview,
                                                inputs=preview_params,
                                                outputs=[img_preview])
                                                          choices=self.image_modes)
                             dd_models = gr.Dropdown(label="Model", value=DEFAULT_MODEL_TYPE,
                                                     choices=self.sam_inf.available_models)
+                            cb_invert_mask = gr.Checkbox(label="invert mask", value=_mask_hparams["invert_mask"])
                             with gr.Accordion("Mask Parameters", open=False, visible=self.default_mode == AUTOMATIC_MODE) as acc_mask_hparams:
                                 mask_hparams_component = self.mask_generation_parameters(_mask_hparams)
                             output_file = gr.File(label="Generated psd file", scale=9)
                             btn_open_folder = gr.Button("📁\nOpen PSD folder", scale=1)
+                    input_params = [img_input, img_input_prompter, dd_input_modes, dd_models, cb_invert_mask]
                     mask_hparams = mask_hparams_component + [cb_multimask_output]
+                    input_params += mask_hparams
                     btn_generate.click(fn=self.sam_inf.divide_layer,
                                        inputs=input_params, outputs=[gallery_output, output_file])

configs/default_hparams.yaml CHANGED Viewed

@@ -10,3 +10,4 @@ mask_hparams:
   min_mask_region_area: 25.0
   use_m2m: true
   multimask_output: true

   min_mask_region_area: 25.0
   use_m2m: true
   multimask_output: true
+  invert_mask: false

modules/mask_utils.py CHANGED Viewed

@@ -17,6 +17,12 @@ def decode_to_mask(seg: np.ndarray[np.bool_] | np.ndarray[np.uint8]) -> np.ndarr
         return seg.astype(np.uint8)
 def generate_random_color() -> Tuple[int, int, int]:
     """Generate random color in RGB format"""
     h = np.random.randint(0, 360)
@@ -47,7 +53,6 @@ def create_mask_layers(
         List of RGBA images
     """
     layer_list = []
     sorted_masks = sorted(masks, key=lambda x: x['area'], reverse=True)
     for info in sorted_masks:

         return seg.astype(np.uint8)
+def invert_masks(masks: List[Dict]) -> List[Dict]:
+    """Invert the masks. Used for background masking"""
+    inverted = 1 - masks
+    return inverted
 def generate_random_color() -> Tuple[int, int, int]:
     """Generate random color in RGB format"""
     h = np.random.randint(0, 360)
         List of RGBA images
     """
     layer_list = []
     sorted_masks = sorted(masks, key=lambda x: x['area'], reverse=True)
     for info in sorted_masks:

modules/sam_inference.py CHANGED Viewed

@@ -16,6 +16,7 @@ from modules.model_downloader import (
 from modules.paths import (MODELS_DIR, TEMP_OUT_DIR, TEMP_DIR, MODEL_CONFIGS, OUTPUT_DIR)
 from modules.constants import (BOX_PROMPT_MODE, AUTOMATIC_MODE, COLOR_FILTER, PIXELIZE_FILTER, IMAGE_FILE_EXT)
 from modules.mask_utils import (
     save_psd_with_masks,
     create_mask_combined_images,
     create_mask_gallery,
@@ -133,6 +134,7 @@ class SamInference:
     def generate_mask(self,
                       image: np.ndarray,
                       model_type: str,
                       **params) -> List[Dict[str, Any]]:
         """
         Generate masks with Automatic segmentation. Default hyperparameters are in './configs/default_hparams.yaml.'
@@ -140,6 +142,7 @@ class SamInference:
         Args:
             image (np.ndarray): The input image.
             model_type (str): The model type to load.
             **params: The hyperparameters for the mask generator.
         Returns:
@@ -158,6 +161,11 @@ class SamInference:
         except Exception as e:
             logger.exception(f"Error while auto generating masks : {e}")
             raise RuntimeError(f"Failed to generate masks") from e
         return generated_masks
     def predict_image(self,
@@ -166,6 +174,7 @@ class SamInference:
                       box: Optional[np.ndarray] = None,
                       point_coords: Optional[np.ndarray] = None,
                       point_labels: Optional[np.ndarray] = None,
                       **params) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Predict image with prompt data.
@@ -176,6 +185,7 @@ class SamInference:
             box (np.ndarray): The box prompt data.
             point_coords (np.ndarray): The point coordinates prompt data.
             point_labels (np.ndarray): The point labels prompt data.
             **params: The hyperparameters for the mask generator.
         Returns:
@@ -199,6 +209,10 @@ class SamInference:
         except Exception as e:
             logger.exception(f"Error while predicting image with prompt: {str(e)}")
             raise RuntimeError(f"Failed to predict image with prompt") from e
         return masks, scores, logits
     def add_prediction_to_frame(self,
@@ -295,6 +309,7 @@ class SamInference:
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
                               color_hex: Optional[str] = None,
                               ):
         """
         Add filter to the preview image with the prompt data. Specially made for gradio app.
@@ -306,6 +321,7 @@ class SamInference:
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
         Returns:
             np.ndarray: The filtered image output.
@@ -336,6 +352,9 @@ class SamInference:
             box=box
         )
         masks = (logits[0] > 0.0).cpu().numpy()
         generated_masks = self.format_to_auto_result(masks)
         if filter_mode == COLOR_FILTER:
@@ -351,7 +370,8 @@ class SamInference:
                               filter_mode: str,
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
-                              color_hex: Optional[str] = None
                               ):
         """
         Create a whole filtered video with video_inference_state. Currently only one frame tracking is supported.
@@ -363,6 +383,7 @@ class SamInference:
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
         Returns:
             str: The output video path.
@@ -394,12 +415,14 @@ class SamInference:
             inference_state=self.video_inference_state,
             points=point_coords,
             labels=point_labels,
-            box=box
         )
         video_segments = self.propagate_in_video(inference_state=self.video_inference_state)
         for frame_index, info in video_segments.items():
             orig_image, masks = info["image"], info["mask"]
             masks = self.format_to_auto_result(masks)
             if filter_mode == COLOR_FILTER:
@@ -427,6 +450,7 @@ class SamInference:
                      image_prompt_input_data: Dict,
                      input_mode: str,
                      model_type: str,
                      *params):
         """
         Divide the layer with the given prompt data and save psd file.
@@ -436,6 +460,7 @@ class SamInference:
             image_prompt_input_data (Dict): The image prompt data.
             input_mode (str): The input mode for the image prompt data. ["Automatic", "Box Prompt"]
             model_type (str): The model type to load.
             *params: The hyperparameters for the mask generator.
         Returns:
@@ -467,6 +492,7 @@ class SamInference:
             generated_masks = self.generate_mask(
                 image=image,
                 model_type=model_type,
                 **hparams
             )
@@ -485,7 +511,8 @@ class SamInference:
                 box=box,
                 point_coords=point_coords,
                 point_labels=point_labels,
-                multimask_output=hparams["multimask_output"]
             )
             generated_masks = self.format_to_auto_result(predicted_masks)

 from modules.paths import (MODELS_DIR, TEMP_OUT_DIR, TEMP_DIR, MODEL_CONFIGS, OUTPUT_DIR)
 from modules.constants import (BOX_PROMPT_MODE, AUTOMATIC_MODE, COLOR_FILTER, PIXELIZE_FILTER, IMAGE_FILE_EXT)
 from modules.mask_utils import (
+    invert_masks,
     save_psd_with_masks,
     create_mask_combined_images,
     create_mask_gallery,
     def generate_mask(self,
                       image: np.ndarray,
                       model_type: str,
+                      invert_mask: bool = False,
                       **params) -> List[Dict[str, Any]]:
         """
         Generate masks with Automatic segmentation. Default hyperparameters are in './configs/default_hparams.yaml.'
         Args:
             image (np.ndarray): The input image.
             model_type (str): The model type to load.
+            invert_mask (bool): Invert the mask output - used for background masking.
             **params: The hyperparameters for the mask generator.
         Returns:
         except Exception as e:
             logger.exception(f"Error while auto generating masks : {e}")
             raise RuntimeError(f"Failed to generate masks") from e
+        if invert_mask:
+            generated_masks = [{'segmentation': invert_masks(mask['segmentation']),
+                                'area': mask['area']} for mask in generated_masks]
         return generated_masks
     def predict_image(self,
                       box: Optional[np.ndarray] = None,
                       point_coords: Optional[np.ndarray] = None,
                       point_labels: Optional[np.ndarray] = None,
+                      invert_mask: bool = False,
                       **params) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Predict image with prompt data.
             box (np.ndarray): The box prompt data.
             point_coords (np.ndarray): The point coordinates prompt data.
             point_labels (np.ndarray): The point labels prompt data.
+            invert_mask (bool): Invert the mask output - used for background masking.
             **params: The hyperparameters for the mask generator.
         Returns:
         except Exception as e:
             logger.exception(f"Error while predicting image with prompt: {str(e)}")
             raise RuntimeError(f"Failed to predict image with prompt") from e
+        if invert_mask:
+            masks = invert_masks(masks)
         return masks, scores, logits
     def add_prediction_to_frame(self,
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
                               color_hex: Optional[str] = None,
+                              invert_mask: bool = False
                               ):
         """
         Add filter to the preview image with the prompt data. Specially made for gradio app.
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
+            invert_mask (bool): Invert the mask output - used for background masking.
         Returns:
             np.ndarray: The filtered image output.
             box=box
         )
         masks = (logits[0] > 0.0).cpu().numpy()
+        if invert_mask:
+            masks = invert_masks(masks)
         generated_masks = self.format_to_auto_result(masks)
         if filter_mode == COLOR_FILTER:
                               filter_mode: str,
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
+                              color_hex: Optional[str] = None,
+                              invert_mask: bool = False
                               ):
         """
         Create a whole filtered video with video_inference_state. Currently only one frame tracking is supported.
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
+            invert_mask (bool): Invert the mask output - used for background masking.
         Returns:
             str: The output video path.
             inference_state=self.video_inference_state,
             points=point_coords,
             labels=point_labels,
+            box=box,
         )
         video_segments = self.propagate_in_video(inference_state=self.video_inference_state)
         for frame_index, info in video_segments.items():
             orig_image, masks = info["image"], info["mask"]
+            if invert_mask:
+                masks = invert_masks(masks)
             masks = self.format_to_auto_result(masks)
             if filter_mode == COLOR_FILTER:
                      image_prompt_input_data: Dict,
                      input_mode: str,
                      model_type: str,
+                     invert_mask: bool = False,
                      *params):
         """
         Divide the layer with the given prompt data and save psd file.
             image_prompt_input_data (Dict): The image prompt data.
             input_mode (str): The input mode for the image prompt data. ["Automatic", "Box Prompt"]
             model_type (str): The model type to load.
+            invert_mask (bool): Invert the mask output.
             *params: The hyperparameters for the mask generator.
         Returns:
             generated_masks = self.generate_mask(
                 image=image,
                 model_type=model_type,
+                invert_mask=invert_mask,
                 **hparams
             )
                 box=box,
                 point_coords=point_coords,
                 point_labels=point_labels,
+                multimask_output=hparams["multimask_output"],
+                invert_mask=invert_mask
             )
             generated_masks = self.format_to_auto_result(predicted_masks)