Spaces:
Running
on
Zero
Running
on
Zero
update data and model
Browse files- app.py +8 -3
- backbone.py +8 -9
app.py
CHANGED
|
@@ -533,6 +533,9 @@ def make_dataset_images_section(open=False):
|
|
| 533 |
labels = np.array(dataset['label'])
|
| 534 |
unique_labels = np.unique(labels)
|
| 535 |
valid_classes = [i for i in classes if i in unique_labels]
|
|
|
|
|
|
|
|
|
|
| 536 |
if len(valid_classes) == 0:
|
| 537 |
gr.Error(f"Classes {classes} not found in the dataset.")
|
| 538 |
return None
|
|
@@ -580,11 +583,13 @@ def make_parameters_section():
|
|
| 580 |
layer_dict = LAYER_DICT
|
| 581 |
if model_name in layer_dict:
|
| 582 |
value = layer_dict[model_name]
|
| 583 |
-
return gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True)
|
|
|
|
| 584 |
else:
|
| 585 |
value = 12
|
| 586 |
-
return gr.
|
| 587 |
-
|
|
|
|
| 588 |
|
| 589 |
with gr.Accordion("➡️ Click to expand: more parameters", open=False):
|
| 590 |
affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
|
|
|
|
| 533 |
labels = np.array(dataset['label'])
|
| 534 |
unique_labels = np.unique(labels)
|
| 535 |
valid_classes = [i for i in classes if i in unique_labels]
|
| 536 |
+
invalid_classes = [i for i in classes if i not in unique_labels]
|
| 537 |
+
if len(invalid_classes) > 0:
|
| 538 |
+
gr.Warning(f"Classes {invalid_classes} not found in the dataset.")
|
| 539 |
if len(valid_classes) == 0:
|
| 540 |
gr.Error(f"Classes {classes} not found in the dataset.")
|
| 541 |
return None
|
|
|
|
| 583 |
layer_dict = LAYER_DICT
|
| 584 |
if model_name in layer_dict:
|
| 585 |
value = layer_dict[model_name]
|
| 586 |
+
return (gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True),
|
| 587 |
+
gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Backbone: Layer type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?"))
|
| 588 |
else:
|
| 589 |
value = 12
|
| 590 |
+
return (gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Backbone: Layer type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?"),
|
| 591 |
+
gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True))
|
| 592 |
+
model_dropdown.change(fn=change_layer_slider, inputs=model_dropdown, outputs=[layer_slider, node_type_dropdown])
|
| 593 |
|
| 594 |
with gr.Accordion("➡️ Click to expand: more parameters", open=False):
|
| 595 |
affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
|
backbone.py
CHANGED
|
@@ -1,17 +1,14 @@
|
|
|
|
|
|
|
|
| 1 |
from typing import Optional, Tuple
|
| 2 |
from einops import rearrange
|
| 3 |
import requests
|
| 4 |
import torch
|
| 5 |
import torch.nn.functional as F
|
| 6 |
import timm
|
| 7 |
-
from PIL import Image
|
| 8 |
from torch import nn
|
| 9 |
import numpy as np
|
| 10 |
import os
|
| 11 |
-
import time
|
| 12 |
-
|
| 13 |
-
import gradio as gr
|
| 14 |
-
|
| 15 |
from functools import partial
|
| 16 |
|
| 17 |
MODEL_DICT = {}
|
|
@@ -613,7 +610,7 @@ class EVA02(nn.Module):
|
|
| 613 |
super().__init__(**kwargs)
|
| 614 |
|
| 615 |
model = timm.create_model(
|
| 616 |
-
'
|
| 617 |
pretrained=True,
|
| 618 |
num_classes=0, # remove classifier nn.Linear
|
| 619 |
)
|
|
@@ -660,9 +657,9 @@ class EVA02(nn.Module):
|
|
| 660 |
'block': block_outputs
|
| 661 |
}
|
| 662 |
|
| 663 |
-
MODEL_DICT["
|
| 664 |
-
LAYER_DICT["
|
| 665 |
-
RES_DICT["
|
| 666 |
|
| 667 |
class CLIPConvnext(nn.Module):
|
| 668 |
def __init__(self):
|
|
@@ -862,6 +859,8 @@ def extract_features(images, model, model_name, node_type, layer, batch_size=8):
|
|
| 862 |
inp = inp.cuda()
|
| 863 |
out = model(inp) # {'attn': [B, H, W, C], 'mlp': [B, H, W, C], 'block': [B, H, W, C]}
|
| 864 |
out = out[node_type]
|
|
|
|
|
|
|
| 865 |
out = out[layer]
|
| 866 |
# normalize
|
| 867 |
out = F.normalize(out, dim=-1)
|
|
|
|
| 1 |
+
# Author: Huzheng Yang
|
| 2 |
+
# %%
|
| 3 |
from typing import Optional, Tuple
|
| 4 |
from einops import rearrange
|
| 5 |
import requests
|
| 6 |
import torch
|
| 7 |
import torch.nn.functional as F
|
| 8 |
import timm
|
|
|
|
| 9 |
from torch import nn
|
| 10 |
import numpy as np
|
| 11 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from functools import partial
|
| 13 |
|
| 14 |
MODEL_DICT = {}
|
|
|
|
| 610 |
super().__init__(**kwargs)
|
| 611 |
|
| 612 |
model = timm.create_model(
|
| 613 |
+
'eva02_base_patch14_448.mim_in22k_ft_in1k',
|
| 614 |
pretrained=True,
|
| 615 |
num_classes=0, # remove classifier nn.Linear
|
| 616 |
)
|
|
|
|
| 657 |
'block': block_outputs
|
| 658 |
}
|
| 659 |
|
| 660 |
+
MODEL_DICT["CLIP(eva02_base_patch14_448.mim_in22k_ft_in1k)"] = partial(EVA02)
|
| 661 |
+
LAYER_DICT["CLIP(eva02_base_patch14_448.mim_in22k_ft_in1k)"] = 12
|
| 662 |
+
RES_DICT["CLIP(eva02_base_patch14_448.mim_in22k_ft_in1k)"] = (448, 448)
|
| 663 |
|
| 664 |
class CLIPConvnext(nn.Module):
|
| 665 |
def __init__(self):
|
|
|
|
| 859 |
inp = inp.cuda()
|
| 860 |
out = model(inp) # {'attn': [B, H, W, C], 'mlp': [B, H, W, C], 'block': [B, H, W, C]}
|
| 861 |
out = out[node_type]
|
| 862 |
+
if out is None:
|
| 863 |
+
raise ValueError(f"Node type {node_type} not found in model {model_name}")
|
| 864 |
out = out[layer]
|
| 865 |
# normalize
|
| 866 |
out = F.normalize(out, dim=-1)
|