JointTaggerProject-Inference-Beta-AttnVis

Sleeping

App Files Files Community

drhead commited on May 6, 2025

Commit

2c3b7a4

verified ·

1 Parent(s): 57e083a

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
-import json
-import gradio as gr
 from PIL import Image
-import safetensors.torch
-import spaces
-import timm
-from timm.models import VisionTransformer
 import torch
 from torchvision.transforms import transforms
 from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
 from huggingface_hub import hf_hub_download
-import numpy as np
-import matplotlib.cm as cm
 class Fit(torch.nn.Module):
     def __init__(
@@ -147,12 +146,13 @@ cached_model = hf_hub_download(
 safetensors.torch.load_model(model, cached_model)
 model.eval()
-with open("tagger_tags.json", "r") as file:
-    tags = json.load(file) # type: dict
-allowed_tags = list(tags.keys())
-for idx, tag in enumerate(allowed_tags):
-    allowed_tags[idx] = tag.replace("_", " ")
 @spaces.GPU(duration=5)
 def run_classifier(image: Image.Image, threshold):
@@ -161,11 +161,10 @@ def run_classifier(image: Image.Image, threshold):
     with torch.no_grad():
         probits = model(tensor)[0] # type: torch.Tensor
-        values, indices = probits.topk(250)
-    tag_score = dict()
-    for i in range(indices.size(0)):
-        tag_score[allowed_tags[indices[i]]] = values[i].item()
     sorted_tag_score = dict(sorted(tag_score.items(), key=lambda item: item[1], reverse=True))
     return *create_tags(threshold, sorted_tag_score), img, sorted_tag_score
@@ -178,8 +177,9 @@ def create_tags(threshold, sorted_tag_score: dict):
 def clear_image():
     return "", {}, None, {}, None
 def cam_inference(img, threshold, alpha, evt: gr.SelectData):
-    target_tag = evt.value
     tensor = transform(img).unsqueeze(0)
     gradients = {}
@@ -191,7 +191,6 @@ def cam_inference(img, threshold, alpha, evt: gr.SelectData):
     def hook_backward(module, grad_in, grad_out):
         gradients['value'] = grad_out[0]
-    target_tag_index = allowed_tags.index(target_tag)
     handle_forward = model.norm.register_forward_hook(hook_forward)
     handle_backward = model.norm.register_full_backward_hook(hook_backward)
@@ -287,11 +286,11 @@ with gr.Blocks(css=custom_css) as demo:
     with gr.Row():
         with gr.Column():
             image = gr.Image(label="Source", sources=['upload', 'clipboard'], type='pil', show_label=False, elem_id="image_container")
-            threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Tag Threshold")
             cam_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.40, label="CAM Threshold", elem_classes="inferno-slider")
             alpha_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.60, label="CAM Alpha")
         with gr.Column():
             tag_string = gr.Textbox(label="Tag String")
             label_box = gr.Label(label="Tag Predictions", num_top_classes=250, show_label=False)
     gr.Markdown("""

 from PIL import Image
+import numpy as np
+import matplotlib.cm as cm
+import msgspec
 import torch
 from torchvision.transforms import transforms
 from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
+import timm
+from timm.models import VisionTransformer
+import safetensors.torch
+import gradio as gr
+import spaces
 from huggingface_hub import hf_hub_download
 class Fit(torch.nn.Module):
     def __init__(
 safetensors.torch.load_model(model, cached_model)
 model.eval()
+with open("tagger_tags.json", "rb") as file:
+    tags = msgspec.json.decode(file.read(), type=dict[str, int])
+for tag in tags.keys():
+    tags[tag.replace("_", " ")] = tags.pop(tag)
+allowed_tags = list(tags.keys())
 @spaces.GPU(duration=5)
 def run_classifier(image: Image.Image, threshold):
     with torch.no_grad():
         probits = model(tensor)[0] # type: torch.Tensor
+        values, indices = probits.cpu().topk(250)
+    tag_score = {allowed_tags[idx.item()]: val.item() for idx, val in zip(indices, values)}
     sorted_tag_score = dict(sorted(tag_score.items(), key=lambda item: item[1], reverse=True))
     return *create_tags(threshold, sorted_tag_score), img, sorted_tag_score
 def clear_image():
     return "", {}, None, {}, None
+@spaces.GPU(duration=5)
 def cam_inference(img, threshold, alpha, evt: gr.SelectData):
+    target_tag_index = tags[evt.value]
     tensor = transform(img).unsqueeze(0)
     gradients = {}
     def hook_backward(module, grad_in, grad_out):
         gradients['value'] = grad_out[0]
     handle_forward = model.norm.register_forward_hook(hook_forward)
     handle_backward = model.norm.register_full_backward_hook(hook_backward)
     with gr.Row():
         with gr.Column():
             image = gr.Image(label="Source", sources=['upload', 'clipboard'], type='pil', show_label=False, elem_id="image_container")
             cam_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.40, label="CAM Threshold", elem_classes="inferno-slider")
             alpha_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.60, label="CAM Alpha")
         with gr.Column():
             tag_string = gr.Textbox(label="Tag String")
+            threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Tag Threshold")
             label_box = gr.Label(label="Tag Predictions", num_top_classes=250, show_label=False)
     gr.Markdown("""