Spaces:

Samarth991
/

CLIP-Detection

Sleeping

App Files Files Community

kushagra124 commited on Nov 27, 2023

Commit

42545c2

1 Parent(s): b2f9f4f

adding text box

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -13,6 +13,15 @@ processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
 model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
 classes = list()
 def rescale_bbox(bbox,orig_image_shape=(1024,1024),model_shape=352):
     bbox = np.asarray(bbox)/model_shape
     y1,y2 = bbox[::2] *orig_image_shape[0]
@@ -30,7 +39,10 @@ def detect_using_clip(image,prompts=[],threshould=0.4):
     with torch.no_grad():  # Use 'torch.no_grad()' to disable gradient computation
         outputs = model(**inputs)
     preds = outputs.logits.unsqueeze(1)
     detection = outputs.logits[0]  # Assuming class index 0
     for i,prompt in enumerate(prompts):
         predicted_image =  torch.sigmoid(preds[i][0]).detach().cpu().numpy()
         predicted_image = np.where(predicted_image>threshould,255,0)
@@ -39,7 +51,7 @@ def detect_using_clip(image,prompts=[],threshould=0.4):
         props = regionprops(lbl_0)
         model_detections[prompt] = [rescale_bbox(prop.bbox,orig_image_shape=image.shape[:2],model_shape=predicted_image.shape[0]) for prop in props]
-    return model_detections
 def visualize_images(image,detections,prompt):
     H,W = image.shape[:2]
@@ -49,26 +61,20 @@ def visualize_images(image,detections,prompt):
         return image_copy
     for bbox in detections[prompt]:
         cv2.rectangle(image_copy, (int(bbox[1]), int(bbox[0])), (int(bbox[3]), int(bbox[2])), (255, 0, 0), 2)
     return image_copy
 def shot(image, labels_text,selected_categoty):
-    print("Labels Text ",labels_text)
     prompts = labels_text.split(',')
-    classes = prompts
-    print("prompts :",prompts,classes)
-    print("Image shape ",image.shape )
-    model_detections  = detect_using_clip(image,prompts=prompts)
-    print("detections :",model_detections)
-    print("Ctegory ",selected_categoty)
-    return visualize_images(image=image,detections=model_detections,prompt=selected_categoty)
 iface = gr.Interface(fn=shot,
                     inputs = ["image","text","text"],
                     outputs="image",
-                    description="Add a picture and a list of labels separated by commas",
                     title="Zero-shot Image Classification with Prompt ",
                     examples=[["images/room.jpg","bed,table,plant",'plant']],
                     # allow_flagging=False,

 model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
 classes = list()
+def create_mask(image,image_mask,alpha=0.7):
+    mask = np.zeros_like(image)
+    # copy your image_mask to all dimensions (i.e. colors) of your image
+    for i in range(3):
+        mask[:,:,i] = image_mask.copy()
+    # apply the mask to your image
+    overlay_image = cv2.addWeighted(mask,alpha,image,1-alpha,0)
+    return overlay_image
 def rescale_bbox(bbox,orig_image_shape=(1024,1024),model_shape=352):
     bbox = np.asarray(bbox)/model_shape
     y1,y2 = bbox[::2] *orig_image_shape[0]
     with torch.no_grad():  # Use 'torch.no_grad()' to disable gradient computation
         outputs = model(**inputs)
     preds = outputs.logits.unsqueeze(1)
+    # tensor_images = [torch.sigmoid(preds[i][0]) for i in range(len(prompts))]
     detection = outputs.logits[0]  # Assuming class index 0
     for i,prompt in enumerate(prompts):
         predicted_image =  torch.sigmoid(preds[i][0]).detach().cpu().numpy()
         predicted_image = np.where(predicted_image>threshould,255,0)
         props = regionprops(lbl_0)
         model_detections[prompt] = [rescale_bbox(prop.bbox,orig_image_shape=image.shape[:2],model_shape=predicted_image.shape[0]) for prop in props]
+    return model_detections
 def visualize_images(image,detections,prompt):
     H,W = image.shape[:2]
         return image_copy
     for bbox in detections[prompt]:
         cv2.rectangle(image_copy, (int(bbox[1]), int(bbox[0])), (int(bbox[3]), int(bbox[2])), (255, 0, 0), 2)
+        cv2.putText(image_copy,(int(bbox[1]), int(bbox[0])),cv2.FONT_HERSHEY_SIMPLEX, 2, 255)
     return image_copy
 def shot(image, labels_text,selected_categoty):
     prompts = labels_text.split(',')
+    model_detections = detect_using_clip(image,prompts=prompts)
+    category_image = visualize_images(image=image,detections=model_detections,prompt=selected_categoty)
+    return category_image
 iface = gr.Interface(fn=shot,
                     inputs = ["image","text","text"],
                     outputs="image",
+                    description="Add an Image and list of category to be detected separated by commas",
                     title="Zero-shot Image Classification with Prompt ",
                     examples=[["images/room.jpg","bed,table,plant",'plant']],
                     # allow_flagging=False,