Spaces:

spdraptor
/

Photo_edit

Paused

App Files Files Community

spdraptor commited on Aug 30, 2024

Commit

e2e727d

1 Parent(s): d42634f

file added

Browse files

Files changed (3) hide show

app.py +35 -4
modules/masking_module.py +168 -0
requirements.txt +10 -0

app.py CHANGED Viewed

@@ -1,7 +1,38 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+#Script added by SPDraptor
+from typing import Optional
+import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+import torch
+print("cuda present = ",torch.cuda.is_available())
+import os
+import sys
 import gradio as gr
+from PIL import Image
+from modules import masking_module
+DESCRIPTION = "Welcome to Raptor APIs"
+css = """
+  #output {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
+  }
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Tab(label="OBJ_mask"):
+        with gr.Row():
+            with gr.Column():
+                image = gr.Image(label="Input main Picture")
+                image_object = gr.Textbox(label="object name")
+                mask_btn = gr.Button(value="createMask")
+                mask_btn.click(masking_module.masking_process,input=[image,image_object],output=output_mask,api_name="masking_step")
+            with gr.Column():
+                output_mask = gr.Image(label="mask")
+demo.launch(debug=True)

modules/masking_module.py ADDED Viewed

	@@ -0,0 +1,168 @@

+#Script added by SPDraptor
+import copy
+import numpy as np
+import spaces
+import torch
+from PIL import Image, ImageDraw
+from transformers import  AutoProcessor, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from typing import Any
+import supervision as sv
+from sam2.build_sam import build_sam2, build_sam2_video_predictor
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+device = torch.device('cuda')
+model_id = 'microsoft/Florence-2-large'
+models_dict = {
+    'Florence_model':AutoModelForCausalLM.from_pretrained(model_id,
+                                             trust_remote_code=True,
+                                             attn_implementation="flash_attention_2",
+                                             device_map=device).eval(),
+    'Florence_processor':AutoProcessor.from_pretrained(model_id, trust_remote_code=True),
+}
+SAM_CHECKPOINT = "/home/user/app/sam2_hiera_large.pt"
+SAM_CONFIG = "sam2_hiera_l.yaml"
+def load_sam_image_model(
+    device: torch.device,
+    config: str = SAM_CONFIG,
+    checkpoint: str = SAM_CHECKPOINT
+) -> SAM2ImagePredictor:
+    model = build_sam2(config, checkpoint, device=device)
+    return SAM2ImagePredictor(sam_model=model)
+SAM_IMAGE_MODEL = load_sam_image_model(device=device)
+def run_sam_inference(
+    model: Any,
+    image: Image,
+    detections: sv.Detections
+) -> sv.Detections:
+    image = np.array(image.convert("RGB"))
+    model.set_image(image)
+    print(type(detections.xyxy),detections.xyxy)
+    if detections.xyxy.size == 0:
+      return {
+                'code': 400,
+                'data':'null',
+                'message':'The AI couldn’t detect the object you want to mask.'
+            }
+    mask, score, _ = model.predict(box=detections.xyxy, multimask_output=False)
+    # dirty fix; remove this later
+    if len(mask.shape) == 4:
+        mask = np.squeeze(mask)
+    detections.mask = mask.astype(bool)
+    return {
+                'code': 200,
+                'data':detections,
+                'message':'The AI couldn’t detect the object you want to mask.'
+            }
+def florence2(image,task_prompt, text_input=None):
+    """
+    Calling the Microsoft Florence2 model
+    """
+    model = models_dict['Florence_model']
+    processor = models_dict['Florence_processor']
+    # print(image)
+    if text_input is None:
+        prompt = task_prompt
+    else:
+        prompt = task_prompt + text_input
+    input_florence = processor(text=prompt, images=image, return_tensors="pt").to(torch.float16).to("cuda")
+    print(input_florence)
+    generated_ids = model.generate(
+        input_ids=input_florence["input_ids"],
+        pixel_values=input_florence["pixel_values"],
+        max_new_tokens=1024,
+        early_stopping=False,
+        do_sample=False,
+        num_beams=3,
+    )
+    generated_text = processor.batch_decode(generated_ids,
+                                            skip_special_tokens=False)[0]
+    parsed_answer = processor.post_process_generation(
+        generated_text,
+        task=task_prompt,
+        image_size=(image.width, image.height))
+    return parsed_answer
+def draw_MASK(image, prediction, fill_mask=False):
+    """
+    Draws segmentation masks with polygons on an image.
+    Parameters:
+    - image_path: Path to the image file.
+    - prediction: Dictionary containing 'polygons' and 'labels' keys.
+                  'polygons' is a list of lists, each containing vertices of a polygon.
+                  'labels' is a list of labels corresponding to each polygon.
+    - fill_mask: Boolean indicating whether to fill the polygons with color.
+    """
+    width=image.width
+    height=image.height
+    new_image = Image.new("RGB", (width, height), color="black")
+    draw = ImageDraw.Draw(new_image)
+    scale = 1
+    for polygons, label in zip(prediction['polygons'], prediction['labels']):
+        color = "white"
+        fill_color = "white" if fill_mask else None
+        for _polygon in polygons:
+            _polygon = np.array(_polygon).reshape(-1, 2)
+            if len(_polygon) < 3:
+                print('Invalid polygon:', _polygon)
+                continue
+            _polygon = (_polygon * scale).reshape(-1).tolist()
+            if fill_mask:
+                draw.polygon(_polygon, outline=color, fill=fill_color)
+            else:
+                draw.polygon(_polygon, outline=color)
+            draw.text((_polygon[0] + 8, _polygon[1] + 2), label, fill=color)
+    return new_image
+# @spaces.GPU
+def masking_process(image,obj):
+    # task_prompt = '<REGION_TO_SEGMENTATION>'
+    # # task_prompt = '<OPEN_VOCABULARY_DETECTION>'
+    # print(type(task_prompt),type(obj))
+    image = Image.open(image.file).convert("RGB")
+    # results = florence2(image,task_prompt, text_input=obj)
+    # output_image = copy.deepcopy(image)
+    # img=draw_MASK(output_image,
+    #           results['<REGION_TO_SEGMENTATION>'],
+    #           fill_mask=True)
+    # mask=img.convert('1')
+    task_prompt = '<OPEN_VOCABULARY_DETECTION>'
+    # image = Image.open("/content/tiger.jpeg").convert("RGB")
+    # obj = "Tiger"
+    Florence_results = florence2(image,task_prompt, text_input=obj)
+    detections = sv.Detections.from_lmm(
+                lmm=sv.LMM.FLORENCE_2,
+                result=Florence_results,
+                resolution_wh=image.size
+            )
+    response = run_sam_inference(SAM_IMAGE_MODEL, image, detections)
+    if response['code'] == 400:
+      return response
+    else:
+      detections2=response['data']
+    mask = Image.fromarray(detections2.mask[0])
+    response['data']=mask
+    torch.cuda.empty_cache()
+    return response

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+tqdm
+einops
+spaces
+timm
+transformers
+samv2
+gradio
+supervision
+opencv-python
+pytest