Spaces:

ura23
/

wd-tagger

Running

App Files Files Community

ura23 commited on Jan 25, 2025

Commit

8d32a56

verified ·

1 Parent(s): dc7b29f

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -55

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import argparse
 import os
-from pathlib import Path
 import gradio as gr
 import huggingface_hub
@@ -8,12 +7,10 @@ import numpy as np
 import onnxruntime as rt
 import pandas as pd
 from PIL import Image
-from tagger.common import Heatmap, ImageLabels, LabelData, load_labels_hf, preprocess_image
-from tagger.model import load_model_and_transform, process_heatmap
-TITLE = "WaifuDiffusion Tagger with Heatmap"
 DESCRIPTION = """
-Demo for the WaifuDiffusion tagger models with heatmap and grid visualization.
 """
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
@@ -25,17 +22,34 @@ VIT_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-tagger-v3"
 VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
 EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
-MODEL_REPOS = [
-    SWINV2_MODEL_DSV3_REPO,
-    CONV_MODEL_DSV3_REPO,
-    VIT_MODEL_DSV3_REPO,
-    VIT_LARGE_MODEL_DSV3_REPO,
-    EVA02_LARGE_MODEL_DSV3_REPO,
-]
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
 class Predictor:
     def __init__(self):
         self.model_target_size = None
@@ -52,7 +66,7 @@ class Predictor:
         csv_path, model_path = self.download_model(model_repo)
         tags_df = pd.read_csv(csv_path)
-        self.tag_names, self.general_indexes, self.character_indexes = self.load_labels(tags_df)
         model = rt.InferenceSession(model_path)
         _, height, width, _ = model.get_inputs()[0].shape
@@ -60,19 +74,21 @@ class Predictor:
         self.last_loaded_repo = model_repo
         self.model = model
-    def load_labels(self, dataframe):
-        tag_names = dataframe["name"].tolist()
-        general_indexes = list(np.where(dataframe["category"] == 0)[0])
-        character_indexes = list(np.where(dataframe["category"] == 4)[0])
-        return tag_names, general_indexes, character_indexes
     def prepare_image(self, image):
         canvas = Image.new("RGBA", image.size, (255, 255, 255))
         if image.mode != "RGBA":
             image = image.convert("RGBA")
         canvas.alpha_composite(image)
         image = canvas.convert("RGB")
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
@@ -80,7 +96,10 @@ class Predictor:
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
-        return np.expand_dims(np.asarray(padded_image, dtype=np.float32)[:, :, ::-1], axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
@@ -99,58 +118,101 @@ class Predictor:
         return results
-    def generate_heatmap_and_grid(self, image, model_repo, threshold):
-        model, transform = load_model_and_transform(model_repo)
-        labels = load_labels_hf(model_repo)
-        image = preprocess_image(image, (448, 448))
-        image = transform(image).unsqueeze(0)
-        heatmaps, heatmap_grid, _ = process_heatmap(model, image, labels, threshold)
-        return [(x.image, x.label) for x in heatmaps], heatmap_grid
 def main():
     predictor = Predictor()
     with gr.Blocks(title=TITLE) as demo:
         gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
         gr.Markdown(DESCRIPTION)
         with gr.Row():
             with gr.Column():
-                image_files = gr.File(file_types=["image"], label="Upload Images", file_count="multiple")
-                model_repo = gr.Dropdown(MODEL_REPOS, value=VIT_MODEL_DSV3_REPO, label="Select Model")
-                threshold = gr.Slider(0, 1, step=0.01, value=0.35, label="Heatmap Threshold")
-                general_thresh = gr.Slider(0, 1, step=0.05, value=0.3, label="General Tags Threshold")
-                character_thresh = gr.Slider(0, 1, step=0.05, value=1.0, label="Character Tags Threshold")
-                submit = gr.Button(value="Process Images", variant="primary")
             with gr.Column():
-                with gr.Tab(label="Tags"):
-                    output_tags = gr.Textbox(label="Output Tags", lines=10)
-                with gr.Tab(label="Heatmaps"):
-                    heatmap_gallery = gr.Gallery(label="Heatmap Gallery")
-                with gr.Tab(label="Grid"):
-                    heatmap_grid = gr.Image(label="Heatmap Grid")
-        def process_images(files, model_repo, general_thresh, character_thresh, threshold):
-            images = [Image.open(file.name) for file in files]
-            tag_results = predictor.predict(images, model_repo, general_thresh, character_thresh)
-            heatmap_results, grid_result = predictor.generate_heatmap_and_grid(images[0], model_repo, threshold)
-            tag_output = []
-            for general_tags, character_tags in tag_results:
-                general_str = ", ".join(general_tags)
-                character_str = ", ".join(character_tags)
-                tag_output.append(f"Characters: {character_str}\nGeneral: {general_str}")
-            return "\n\n".join(tag_output), heatmap_results, grid_result
         submit.click(
             process_images,
-            inputs=[image_files, model_repo, general_thresh, character_thresh, threshold],
-            outputs=[output_tags, heatmap_gallery, heatmap_grid]
         )
     demo.launch()
 if __name__ == "__main__":
-    main()

 import argparse
 import os
 import gradio as gr
 import huggingface_hub
 import onnxruntime as rt
 import pandas as pd
 from PIL import Image
+TITLE = "WaifuDiffusion Tagger"
 DESCRIPTION = """
+Demo for the WaifuDiffusion tagger models
 """
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
 EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
+# Dataset v2 series of models:
+MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
+SWIN_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
+CONV_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+CONV2_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
+VIT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
+# IdolSankaku series of models:
+EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
+SWINV2_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-swinv2-tagger-v1"
+# Files to download from the repos
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--score-slider-step", type=float, default=0.05)
+    parser.add_argument("--score-general-threshold", type=float, default=0.3)
+    parser.add_argument("--score-character-threshold", type=float, default=1.0)
+    return parser.parse_args()
+def load_labels(dataframe) -> list[str]:
+    tag_names = dataframe["name"].tolist()
+    general_indexes = list(np.where(dataframe["category"] == 0)[0])
+    character_indexes = list(np.where(dataframe["category"] == 4)[0])
+    return tag_names, general_indexes, character_indexes
 class Predictor:
     def __init__(self):
         self.model_target_size = None
         csv_path, model_path = self.download_model(model_repo)
         tags_df = pd.read_csv(csv_path)
+        self.tag_names, self.general_indexes, self.character_indexes = load_labels(tags_df)
         model = rt.InferenceSession(model_path)
         _, height, width, _ = model.get_inputs()[0].shape
         self.last_loaded_repo = model_repo
         self.model = model
     def prepare_image(self, image):
+        # Create a white canvas with the same size as the input image
         canvas = Image.new("RGBA", image.size, (255, 255, 255))
+        # Ensure the input image has an alpha channel for compositing
         if image.mode != "RGBA":
             image = image.convert("RGBA")
+        # Composite the input image onto the canvas
         canvas.alpha_composite(image)
+        # Convert to RGB (alpha channel is no longer needed)
         image = canvas.convert("RGB")
+        # Resize the image to a square of size (model_target_size x model_target_size)
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
+        # Convert the image to a NumPy array
+        image_array = np.asarray(padded_image, dtype=np.float32)[:, :, ::-1]
+        return np.expand_dims(image_array, axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
         return results
 def main():
+    args = parse_args()
     predictor = Predictor()
+    model_repos = [
+        SWINV2_MODEL_DSV3_REPO,
+        CONV_MODEL_DSV3_REPO,
+        VIT_MODEL_DSV3_REPO,
+        VIT_LARGE_MODEL_DSV3_REPO,
+        EVA02_LARGE_MODEL_DSV3_REPO,
+        # ---
+        MOAT_MODEL_DSV2_REPO,
+        SWIN_MODEL_DSV2_REPO,
+        CONV_MODEL_DSV2_REPO,
+        CONV2_MODEL_DSV2_REPO,
+        VIT_MODEL_DSV2_REPO,
+        # ---
+        SWINV2_MODEL_IS_DSV1_REPO,
+        EVA02_LARGE_MODEL_IS_DSV1_REPO,
+    ]
+    predefined_tags = ["loli", "oppai_loli", "minigirl", "babydoll", "monochrome", "greyscale", "speech_bubble", "english_text", "copyright_name", "twitter_username", "artist_name", "watermark", "censored", "bar_censor", "blank_censor", "blur_censor", "light_censor", "mosaic_censoring"]  # Default tags to filter out
     with gr.Blocks(title=TITLE) as demo:
         gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
         gr.Markdown(DESCRIPTION)
         with gr.Row():
             with gr.Column():
+                image_files = gr.File(
+                    file_types=["image"], label="Upload Images", file_count="multiple",
+                )
+                # Wrap the model selection and sliders in an Accordion
+                with gr.Accordion("Advanced Settings", open=False):  # Collapsible by default
+                    model_repo = gr.Dropdown(
+                        model_repos,
+                        value=VIT_MODEL_DSV3_REPO,
+                        label="Select Model",
+                    )
+                    general_thresh = gr.Slider(
+                        0, 1, step=args.score_slider_step, value=args.score_general_threshold, label="General Tags Threshold"
+                    )
+                    character_thresh = gr.Slider(
+                        0, 1, step=args.score_slider_step, value=args.score_character_threshold, label="Character Tags Threshold"
+                    )
+                    filter_tags = gr.Textbox(
+                        value=", ".join(predefined_tags),
+                        label="Filter Tags (comma-separated)",
+                        placeholder="Add tags to filter out (e.g., winter, red, from above)",
+                        lines=3
+                    )
+                submit = gr.Button(
+                    value="Process Images", variant="primary"
+                )
             with gr.Column():
+                output = gr.Textbox(label="Output", lines=10)
+        def process_images(files, model_repo, general_thresh, character_thresh, filter_tags):
+            images = [Image.open(file.name) for file in files]
+            results = predictor.predict(images, model_repo, general_thresh, character_thresh)
+            # Parse filter tags
+            filter_set = set(tag.strip().lower() for tag in filter_tags.split(","))
+            # Generate formatted output
+            prompts = []
+            for i, (general_tags, character_tags) in enumerate(results):
+                # Replace underscores with spaces for both character and general tags
+                character_part = ", ".join(
+                    tag.replace('_', ' ') for tag in character_tags if tag.lower() not in filter_set
+                )
+                general_part = ", ".join(
+                    tag.replace('_', ' ') for tag in general_tags if tag.lower() not in filter_set
+                )
+                # Construct the prompt based on the presence of character_part
+                if character_part:
+                    prompts.append(f"{character_part}, {general_part}")
+                else:
+                    prompts.append(general_part)
+            # Join all prompts with blank lines
+            return "\n\n".join(prompts)
         submit.click(
             process_images,
+            inputs=[image_files, model_repo, general_thresh, character_thresh, filter_tags],
+            outputs=output
         )
+    demo.queue(max_size=10)
     demo.launch()
 if __name__ == "__main__":
+    main()