Spaces:

ura23
/

wd-tagger

Running

App Files Files Community

ura23 commited on Jan 29, 2025

Commit

0cb1e93

verified ·

1 Parent(s): 6e17304

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -50

app.py CHANGED Viewed

@@ -17,12 +17,21 @@ HF_TOKEN = os.environ.get("HF_TOKEN", "")
 # Dataset v3 series of models:
 SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
 # Dataset v2 series of models:
 MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
 # IdolSankaku series of models:
 EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
 # Files to download from the repos
 MODEL_FILENAME = "model.onnx"
@@ -41,25 +50,6 @@ def load_labels(dataframe) -> list[str]:
     character_indexes = list(np.where(dataframe["category"] == 4)[0])
     return tag_names, general_indexes, character_indexes
-def parse_replacements(replacement_text):
-    replacements = {}
-    for line in replacement_text.strip().split("\n"):
-        parts = line.split("->")
-        if len(parts) == 2:
-            old_tags = tuple(tag.strip().lower() for tag in parts[0].split(","))
-            new_tags = [tag.strip() for tag in parts[1].split(",")]
-            replacements[old_tags] = new_tags
-    return replacements
-def apply_replacements(tags, replacements):
-    modified_tags = set(tags)
-    for old_tags, new_tags in replacements.items():
-        if all(tag in modified_tags for tag in old_tags):
-            for tag in old_tags:
-                modified_tags.discard(tag)
-            modified_tags.update(new_tags)
-    return list(modified_tags)
 class Predictor:
     def __init__(self):
         self.model_target_size = None
@@ -85,10 +75,20 @@ class Predictor:
         self.model = model
     def prepare_image(self, image):
         if image.mode != "RGBA":
             image = image.convert("RGBA")
-        image = image.convert("RGB")
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
@@ -96,12 +96,15 @@ class Predictor:
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
         image_array = np.asarray(padded_image, dtype=np.float32)[:, :, ::-1]
         return np.expand_dims(image_array, axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
         results = []
         for image in images:
             image = self.prepare_image(image)
             input_name = self.model.get_inputs()[0].name
@@ -112,39 +115,142 @@ class Predictor:
             general_res = [x[0] for i, x in enumerate(labels) if i in self.general_indexes and x[1] > general_thresh]
             character_res = [x[0] for i, x in enumerate(labels) if i in self.character_indexes and x[1] > character_thresh]
             results.append((general_res, character_res))
         return results
-def process_images(files, model_repo, general_thresh, character_thresh, filter_tags, replacement_text):
-    images = [Image.open(file.name) for file in files]
-    results = predictor.predict(images, model_repo, general_thresh, character_thresh)
-    filter_set = set(tag.strip().lower() for tag in filter_tags.split(","))
-    replacements = parse_replacements(replacement_text)
-    prompts = []
-    for general_tags, character_tags in results:
-        character_tags = apply_replacements([tag.replace("_", " ") for tag in character_tags if tag.lower() not in filter_set], replacements)
-        general_tags = apply_replacements([tag.replace("_", " ") for tag in general_tags if tag.lower() not in filter_set], replacements)
-        prompt = ", ".join(character_tags + general_tags)
-        prompts.append(prompt)
-    return "\n\n".join(prompts)
-predictor = Predictor()
-with gr.Blocks(title=TITLE) as demo:
-    gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
-    gr.Markdown(DESCRIPTION)
-    with gr.Row():
-        with gr.Column():
-            image_files = gr.File(file_types=["image"], label="Upload Images", file_count="multiple")
-            replacement_text = gr.Textbox(label="Tag Replacements", placeholder="e.g., 1boy -> 1girl\nwinter, indoors -> summer, outdoors", lines=5)
-            submit = gr.Button("Process Images", variant="primary")
-        with gr.Column():
-            output = gr.Textbox(label="Output", lines=10)
-    submit.click(process_images, inputs=[image_files, replacement_text], outputs=output)
     demo.launch()

 # Dataset v3 series of models:
 SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
+CONV_MODEL_DSV3_REPO = "SmilingWolf/wd-convnext-tagger-v3"
+VIT_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-tagger-v3"
+VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
+EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
 # Dataset v2 series of models:
 MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
+SWIN_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
+CONV_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+CONV2_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
+VIT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
 # IdolSankaku series of models:
 EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
+SWINV2_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-swinv2-tagger-v1"
 # Files to download from the repos
 MODEL_FILENAME = "model.onnx"
     character_indexes = list(np.where(dataframe["category"] == 4)[0])
     return tag_names, general_indexes, character_indexes
 class Predictor:
     def __init__(self):
         self.model_target_size = None
         self.model = model
     def prepare_image(self, image):
+        # Create a white canvas with the same size as the input image
+        canvas = Image.new("RGBA", image.size, (255, 255, 255))
+        # Ensure the input image has an alpha channel for compositing
         if image.mode != "RGBA":
             image = image.convert("RGBA")
+        # Composite the input image onto the canvas
+        canvas.alpha_composite(image)
+        # Convert to RGB (alpha channel is no longer needed)
+        image = canvas.convert("RGB")
+        # Resize the image to a square of size (model_target_size x model_target_size)
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
+        # Convert the image to a NumPy array
         image_array = np.asarray(padded_image, dtype=np.float32)[:, :, ::-1]
         return np.expand_dims(image_array, axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
         results = []
         for image in images:
             image = self.prepare_image(image)
             input_name = self.model.get_inputs()[0].name
             general_res = [x[0] for i, x in enumerate(labels) if i in self.general_indexes and x[1] > general_thresh]
             character_res = [x[0] for i, x in enumerate(labels) if i in self.character_indexes and x[1] > character_thresh]
             results.append((general_res, character_res))
         return results
+def main():
+    args = parse_args()
+    predictor = Predictor()
+    model_repos = [
+        SWINV2_MODEL_DSV3_REPO,
+        CONV_MODEL_DSV3_REPO,
+        VIT_MODEL_DSV3_REPO,
+        VIT_LARGE_MODEL_DSV3_REPO,
+        EVA02_LARGE_MODEL_DSV3_REPO,
+        # ---
+        MOAT_MODEL_DSV2_REPO,
+        SWIN_MODEL_DSV2_REPO,
+        CONV_MODEL_DSV2_REPO,
+        CONV2_MODEL_DSV2_REPO,
+        VIT_MODEL_DSV2_REPO,
+        # ---
+        SWINV2_MODEL_IS_DSV1_REPO,
+        EVA02_LARGE_MODEL_IS_DSV1_REPO,
+    ]
+    predefined_tags = ["loli",
+                       "oppai_loli",
+                       "onee-shota",
+                       "incest",
+                       "furry",
+                       "furry_female",
+                       "shota",
+                       "male_focus",
+                       "signature",
+                       "lolita_hairband",
+                       "otoko_no_ko",
+                       "minigirl",
+                       "patreon_username",
+                       "babydoll",
+                       "monochrome",
+                       "happy_birthday",
+                       "happy_new_year",
+                       "dated",
+                       "thought_bubble",
+                       "greyscale",
+                       "speech_bubble",
+                       "english_text",
+                       "copyright_name",
+                       "twitter_username",
+                       "patreon username",
+                       "patreon logo",
+                       "cover",
+                       "content_rating"
+                       "cover_page",
+                       "doujin_cover",
+                       "sex",
+                       "artist_name",
+                       "watermark",
+                       "censored",
+                       "bar_censor",
+                       "blank_censor",
+                       "blur_censor",
+                       "light_censor",
+                       "mosaic_censoring"]
+    with gr.Blocks(title=TITLE) as demo:
+        gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
+        gr.Markdown(DESCRIPTION)
+        with gr.Row():
+            with gr.Column():
+                image_files = gr.File(
+                    file_types=["image"], label="Upload Images", file_count="multiple",
+                )
+                # Wrap the model selection and sliders in an Accordion
+                with gr.Accordion("Advanced Settings", open=False):  # Collapsible by default
+                    model_repo = gr.Dropdown(
+                        model_repos,
+                        value=VIT_MODEL_DSV3_REPO,
+                        label="Select Model",
+                    )
+                    general_thresh = gr.Slider(
+                        0, 1, step=args.score_slider_step, value=args.score_general_threshold, label="General Tags Threshold"
+                    )
+                    character_thresh = gr.Slider(
+                        0, 1, step=args.score_slider_step, value=args.score_character_threshold, label="Character Tags Threshold"
+                    )
+                    filter_tags = gr.Textbox(
+                        value=", ".join(predefined_tags),
+                        label="Filter Tags (comma-separated)",
+                        placeholder="Add tags to filter out (e.g., winter, red, from above)",
+                        lines=3
+                    )
+                submit = gr.Button(
+                    value="Process Images", variant="primary"
+                )
+            with gr.Column():
+                output = gr.Textbox(label="Output", lines=10)
+        def process_images(files, model_repo, general_thresh, character_thresh, filter_tags):
+            images = [Image.open(file.name) for file in files]
+            results = predictor.predict(images, model_repo, general_thresh, character_thresh)
+            # Parse filter tags
+            filter_set = set(tag.strip().lower() for tag in filter_tags.split(","))
+            # Generate formatted output
+            prompts = []
+            for i, (general_tags, character_tags) in enumerate(results):
+                # Replace underscores with spaces for both character and general tags
+                character_part = ", ".join(
+                    tag.replace('_', ' ') for tag in character_tags if tag.lower() not in filter_set
+                )
+                general_part = ", ".join(
+                    tag.replace('_', ' ') for tag in general_tags if tag.lower() not in filter_set
+                )
+                # Construct the prompt based on the presence of character_part
+                if character_part:
+                    prompts.append(f"{character_part}, {general_part}")
+                else:
+                    prompts.append(general_part)
+            # Join all prompts with blank lines
+            return "\n\n".join(prompts)
+        submit.click(
+            process_images,
+            inputs=[image_files, model_repo, general_thresh, character_thresh, filter_tags],
+            outputs=output
+        )
+    demo.queue(max_size=10)
     demo.launch()
+if __name__ == "__main__":
+    main()