Spaces:

XAI
/

PEEB

Running on Zero

App Files Files Community

Peijie commited on Apr 26, 2024

Commit

92ef913

1 Parent(s): a410a68

update to support gradio 4+

Browse files

Files changed (4) hide show

app.py +24 -7
requirements.txt +1 -1
utils/load_model.py +9 -2
utils/predict.py +10 -3

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import spaces
 import io
 import os
 debug = False
@@ -29,7 +29,7 @@ PREPROCESS = lambda x: OWLVIT_PRECESSOR(images=x, return_tensors='pt')
 IMAGES_FOLDER = "data/images"
 # XCLIP_RESULTS = json.load(open("data/jsons/xclip_org.json", "r"))
 IMAGE2GT = json.load(open("data/jsons/image2gt.json", 'r'))
-CUB_DESC_EMBEDS = torch.load('data/text_embeddings/cub_200_desc.pt')
 CUB_IDX2NAME = json.load(open('data/jsons/cub_desc_idx2name.json', 'r'))
 CUB_IDX2NAME = {int(k): v for k, v in CUB_IDX2NAME.items()}
 # correct_predictions = [k for k, v in XCLIP_RESULTS.items() if v['prediction']]
@@ -269,12 +269,20 @@ def update_selected_image(event: gr.SelectData):
     descs = {k: descs[k] for k in ORDERED_PARTS}
     custom_text = [custom_class_name] + list(descs.values())
     descriptions = ";\n".join(custom_text)
-    textbox = gr.Textbox.update(value=descriptions, lines=12, visible=True, label="XCLIP descriptions", interactive=True, info='Please use ";" to separate the descriptions for each part, and keep the format of {part name}: {descriptions}', show_label=False)
     # modified_exp = gr.HTML().update(value="", visible=True)
     return gt_label, img_base64, xclip_pred_markdown, xclip_exp, current_image, textbox
 def on_edit_button_click_xclip():
-    empty_exp = gr.HTML.update(visible=False)
     # Populate the textbox with current descriptions
     descs = XCLIP_DESC[current_predicted_class.state]
@@ -282,7 +290,14 @@ def on_edit_button_click_xclip():
     descs = {k: descs[k] for k in ORDERED_PARTS}
     custom_text = ["class name: custom"] + list(descs.values())
     descriptions = ";\n".join(custom_text)
-    textbox = gr.Textbox.update(value=descriptions, lines=12, visible=True, label="XCLIP descriptions", interactive=True, info='Please use ";" to separate the descriptions for each part, and keep the format of {part name}: {descriptions}', show_label=False)
     return textbox, empty_exp
@@ -350,10 +365,12 @@ def on_predict_button_click_xclip(textbox_input: str):
     custom_pred_markdown = f"""
         ### <span style='color:{custom_color}'> {custom_label} &nbsp;&nbsp;&nbsp; {custom_pred_score:.4f}</span>
     """
-    textbox = gr.Textbox.update(visible=False)
     # return textbox, xclip_pred_markdown, xclip_explanation, custom_pred_markdown, modified_explanation
-    modified_exp = gr.HTML().update(value=modified_explanation, visible=True)
     return textbox, xclip_pred_markdown, xclip_explanation, custom_pred_markdown, modified_exp

 import io
 import os
 debug = False
 IMAGES_FOLDER = "data/images"
 # XCLIP_RESULTS = json.load(open("data/jsons/xclip_org.json", "r"))
 IMAGE2GT = json.load(open("data/jsons/image2gt.json", 'r'))
+CUB_DESC_EMBEDS = torch.load('data/text_embeddings/cub_200_desc.pt').to(DEVICE)
 CUB_IDX2NAME = json.load(open('data/jsons/cub_desc_idx2name.json', 'r'))
 CUB_IDX2NAME = {int(k): v for k, v in CUB_IDX2NAME.items()}
 # correct_predictions = [k for k, v in XCLIP_RESULTS.items() if v['prediction']]
     descs = {k: descs[k] for k in ORDERED_PARTS}
     custom_text = [custom_class_name] + list(descs.values())
     descriptions = ";\n".join(custom_text)
+    # textbox = gr.Textbox.update(value=descriptions, lines=12, visible=True, label="XCLIP descriptions", interactive=True, info='Please use ";" to separate the descriptions for each part, and keep the format of {part name}: {descriptions}', show_label=False)
+    textbox = gr.Textbox(value=descriptions,
+                     lines=12,
+                     visible=True,
+                     label="XCLIP descriptions",
+                     interactive=True,
+                     info='Please use ";" to separate the descriptions for each part, and keep the format of {part name}: {descriptions}',
+                     show_label=False)
     # modified_exp = gr.HTML().update(value="", visible=True)
     return gt_label, img_base64, xclip_pred_markdown, xclip_exp, current_image, textbox
 def on_edit_button_click_xclip():
+    # empty_exp = gr.HTML.update(visible=False)
+    empty_exp = gr.HTML(visible=False)
     # Populate the textbox with current descriptions
     descs = XCLIP_DESC[current_predicted_class.state]
     descs = {k: descs[k] for k in ORDERED_PARTS}
     custom_text = ["class name: custom"] + list(descs.values())
     descriptions = ";\n".join(custom_text)
+    # textbox = gr.Textbox.update(value=descriptions, lines=12, visible=True, label="XCLIP descriptions", interactive=True, info='Please use ";" to separate the descriptions for each part, and keep the format of {part name}: {descriptions}', show_label=False)
+    textbox = gr.Textbox(value=descriptions,
+                         lines=12,
+                            visible=True,
+                            label="XCLIP descriptions",
+                            interactive=True,
+                            info='Please use ";" to separate the descriptions for each part, and keep the format of {part name}: {descriptions}',
+                            show_label=False)
     return textbox, empty_exp
     custom_pred_markdown = f"""
         ### <span style='color:{custom_color}'> {custom_label} &nbsp;&nbsp;&nbsp; {custom_pred_score:.4f}</span>
     """
+    # textbox = gr.Textbox.update(visible=False)
+    textbox = gr.Textbox(visible=False)
     # return textbox, xclip_pred_markdown, xclip_explanation, custom_pred_markdown, modified_explanation
+    # modified_exp = gr.HTML().update(value=modified_explanation, visible=True)
+    modified_exp = gr.HTML(value=modified_explanation, visible=True)
     return textbox, xclip_pred_markdown, xclip_explanation, custom_pred_markdown, modified_exp

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 torch
 torchvision
-gradio==3.41.0
 numpy
 Pillow
 transformers

 torch
 torchvision
+gradio
 numpy
 Pillow
 transformers

utils/load_model.py CHANGED Viewed

@@ -1,12 +1,19 @@
-import spaces
 import torch
 from transformers import OwlViTProcessor, OwlViTForObjectDetection
 from .model import OwlViTForClassification
-@spaces.GPU
 def load_xclip(device: str = "cuda:0",
                n_classes: int = 183,
                use_teacher_logits: bool = False,

+try:
+    import spaces
+    gpu_decorator = spaces.GPU
+except ImportError:
+    # Define a no-operation decorator as fallback
+    def gpu_decorator(func):
+        return func
 import torch
 from transformers import OwlViTProcessor, OwlViTForObjectDetection
 from .model import OwlViTForClassification
+@gpu_decorator
 def load_xclip(device: str = "cuda:0",
                n_classes: int = 183,
                use_teacher_logits: bool = False,

utils/predict.py CHANGED Viewed

@@ -1,4 +1,11 @@
-import spaces
 import PIL
 import torch
@@ -30,7 +37,7 @@ def encode_descs_xclip(owlvit_det_processor: callable, model: callable, descs: l
 #     text_embeds = torch.cat(text_embeds, dim=0)
 #     text_embeds = torch.nn.functional.normalize(text_embeds, dim=-1)
 #     return text_embeds.to(device)
-@spaces.GPU
 def xclip_pred(new_desc: dict,
                new_part_mask: dict,
                new_class: str,
@@ -76,7 +83,7 @@ def xclip_pred(new_desc: dict,
             n_classes = 201
             query_tokens = owlvit_processor(text=list(new_desc_.values()), padding="max_length", truncation=True, return_tensors="pt").to(device)
             new_class_embed = model.owlvit.get_text_features(**query_tokens)
-            query_embeds = torch.cat([cub_embeds, new_class_embed], dim=0)
             modified_class_idx = 200
         else:
             n_classes = 200

+try:
+    import spaces
+    gpu_decorator = spaces.GPU
+except ImportError:
+    # Define a no-operation decorator as fallback
+    def gpu_decorator(func):
+        return func
 import PIL
 import torch
 #     text_embeds = torch.cat(text_embeds, dim=0)
 #     text_embeds = torch.nn.functional.normalize(text_embeds, dim=-1)
 #     return text_embeds.to(device)
+@gpu_decorator
 def xclip_pred(new_desc: dict,
                new_part_mask: dict,
                new_class: str,
             n_classes = 201
             query_tokens = owlvit_processor(text=list(new_desc_.values()), padding="max_length", truncation=True, return_tensors="pt").to(device)
             new_class_embed = model.owlvit.get_text_features(**query_tokens)
+            query_embeds = torch.cat([cub_embeds, new_class_embed], dim=0).to(device)
             modified_class_idx = 200
         else:
             n_classes = 200