Spaces:

SDPrototypeTools
/

StyleSquirrel

Sleeping

App Files Files Community

Food Desert commited on Aug 21, 2025

Commit

c5ae7ff

1 Parent(s): 11f3316

UI polish: cards + tables; clickable tags; cleanup

Browse files

Files changed (2) hide show

README.md +14 -0
app.py +122 -34

README.md CHANGED Viewed

@@ -16,3 +16,17 @@ Drop or paste an image → get style tags and see nearest training images.
 - Trained projector maps to “style space”
 - FAISS finds nearest training images
 - We tally their style tags and normalize to scores in [0,1]

 - Trained projector maps to “style space”
 - FAISS finds nearest training images
 - We tally their style tags and normalize to scores in [0,1]
+---
+# StyleSquirrel
+Nearest-neighbor style tagger demo.
+Drag an image to see predicted style tags and similar training images.
+⚠️ **Note on large model files**
+If you clone this repository locally, you must pull the big model and FAISS index files with Git LFS before running:
+```bash
+git lfs install
+git lfs pull

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import gradio as gr
 from PIL import Image
 from typing import Dict, Tuple
 # 🔧 Your model lives here.
 # Implement load() and predict(image) in model.py.
@@ -14,65 +15,152 @@ def _format_outputs(scores: Dict[str, float], neighbors: list, threshold: float)
     tag_string = ", ".join(filtered.keys())
     return tag_string, filtered, "\n".join(neighbors)
-def infer(image: Image.Image, threshold: float):
     if image is None:
-        return "", {}, ""
-    # model.predict now returns (scores_norm, neighbors, counts_raw)
     scores, neighbors, counts = model.predict(image)
-    # Sort and threshold the display dict
     sorted_scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
-    filtered = {k: float(v) for k, v in sorted_scores if v >= threshold}
-    tag_text = ", ".join(filtered.keys())
-    # Pretty-print neighbors. Each neighbor dict has:
-    # { "filename": str, "similarity": float, "distance": float, "styles": [str, ...] }
-    lines = []
-    for i, d in enumerate(neighbors, 1):
-        styles_str = ", ".join(d.get("styles", []))
-        sim = d.get("similarity", None)
-        dist = d.get("distance", None)
-        if sim is not None and dist is not None:
-            lines.append(f"{i}. {d['filename']}  sim={sim:.3f}  dist={dist:.3f}  styles=[{styles_str}]")
         else:
-            # (just in case similarity/distance are not present)
-            lines.append(f"{i}. {d['filename']}  styles=[{styles_str}]")
-    neighbors_text = "\n".join(lines) if lines else "(neighbors unavailable)"
-    # Return three outputs: tag text, scores dict, and neighbors textbox text
-    return tag_text, filtered, neighbors_text
 def clear_outputs():
-    return "", {}, ""
 custom_css = '''
 #image_container-image { width: 100%; aspect-ratio: 1 / 1; max-height: 100%; }
 #image_container img { object-fit: contain !important; }
 '''
 with gr.Blocks(css=custom_css) as demo:
     gr.Markdown("## Style Tagger — Skeleton (local dev first)")
     with gr.Row():
         with gr.Column():
-            image = gr.Image(label="Drop an image here", sources=["upload", "clipboard"], type="pil", show_label=False, elem_id="image_container")
         with gr.Column():
-            threshold = gr.Slider(0.0, 1.0, value=0.01, step=0.01, label="Confidence threshold")
-            tag_text = gr.Textbox(label="Style tags (comma-separated)")
-            tag_scores = gr.Label(label="Scores", num_top_classes=250, show_label=False)
-            neighbors_text = gr.Textbox(label="Nearest training images", lines=8, interactive=False)
-    image.upload(fn=infer, inputs=[image, threshold], outputs=[tag_text, tag_scores, neighbors_text], show_progress="minimal")
-    image.clear(fn=clear_outputs, inputs=[], outputs=[tag_text, tag_scores, neighbors_text])
-    threshold.input(fn=infer, inputs=[image, threshold], outputs=[tag_text, tag_scores, neighbors_text], show_progress="hidden")
     gr.Markdown("""
     ---
     ### Instructions
     - Drop an image in the box on the left.
-    - Tags that are stylistically similar are returned, along with some statistics about them.
-    - I tried to isolate style from topic and was only partly successful.  So many reported tags might be topically rather than stylistically similar.
     """)

 import gradio as gr
 from PIL import Image
 from typing import Dict, Tuple
+import re
 # 🔧 Your model lives here.
 # Implement load() and predict(image) in model.py.
     tag_string = ", ".join(filtered.keys())
     return tag_string, filtered, "\n".join(neighbors)
+def infer(image: Image.Image):
     if image is None:
+        return "", ""  # (tag_panel_md, neighbors_md)
+    threshold = 0.01  # fixed cutoff
+    # Lazy-load if needed
+    if not getattr(model, "_READY", False):
+        try:
+            model.load()
+        except Exception as e:
+            print("model.load() during infer failed:", e)
+    # Predict
     scores, neighbors, counts = model.predict(image)
+    # Sort & threshold
     sorted_scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
+    filtered = [(k, float(v)) for k, v in sorted_scores if v >= threshold]
+    # ---------- Style Tags: HTML table (link | % right-aligned) ----------
+    if filtered:
+        rows = []
+        for tag, val in filtered:
+            pct = int(round(val * 100))
+            tag_q = tag.replace(" ", "_")
+            url = f"https://e621.net/posts?tags=order%3Afavcount+-animated+{tag_q}"
+            rows.append(
+                f"<tr>"
+                f"<td class='tag-name'><a href='{url}' target='_blank' rel='noopener noreferrer'>{tag}</a></td>"
+                f"<td class='tag-pct'>{pct}%</td>"
+                f"</tr>"
+            )
+        tag_panel_md = "<table class='tag-table'><tbody>" + "".join(rows) + "</tbody></table>"
+    else:
+        tag_panel_md = "_(no tags)_"
+    # ---------- Nearest Neighbors: Markdown list (no 'dist') ----------
+        # ---------- Nearest Neighbors: HTML table (ID | Styles | sim) ----------
+    rows = []
+    for item in neighbors:
+        if isinstance(item, dict):
+            fname  = str(item.get("filename", ""))
+            sim    = item.get("similarity", None)
+            styles = item.get("styles", [])
         else:
+            fname  = str(item)
+            sim    = None
+            styles = []
+        # numeric ID (strip ".png", etc.); link to e621 if we find one
+        m = re.search(r"(\d+)", fname)
+        post_id = m.group(1) if m else fname
+        id_cell = (
+            f"<a href='https://e621.net/posts/{post_id}' target='_blank' rel='noopener noreferrer'>{post_id}</a>"
+            if m else post_id
+        )
+        styles_cell = ", ".join(styles)
+        sim_cell = f"{sim:.3f}" if sim is not None else ""
+        rows.append(
+            f"<tr>"
+            f"<td class='nn-id'>{id_cell}</td>"
+            f"<td class='nn-styles'>{styles_cell}</td>"
+            f"<td class='nn-sim'>{sim_cell}</td>"
+            f"</tr>"
+        )
+    if rows:
+        neighbors_md = (
+            "<table class='nn-table'>"
+            "<thead><tr>"
+            "<th class='nn-id'>ID</th>"
+            "<th class='nn-styles'>Styles</th>"
+            "<th class='nn-sim'>sim</th>"
+            "</tr></thead>"
+            "<tbody>" + "".join(rows) + "</tbody></table>"
+        )
+    else:
+        neighbors_md = "_(neighbors unavailable)_"
+    return tag_panel_md, neighbors_md
 def clear_outputs():
+    return "", ""
 custom_css = '''
 #image_container-image { width: 100%; aspect-ratio: 1 / 1; max-height: 100%; }
 #image_container img { object-fit: contain !important; }
+/* card look for right-side panels */
+.custom-card {
+  background: rgba(255,255,255,0.05);   /* lighter than dark bg */
+  border: 1px solid rgba(255,255,255,0.14);
+  border-radius: 12px;
+  padding: 12px 14px;
+}
+.custom-card .prose { margin: 0; }       /* tighter Markdown spacing */
+.custom-card h3 { margin-top: 0; }       /* keep section title snug */
+.custom-card:hover { box-shadow: 0 6px 20px rgba(0,0,0,0.25); }
+.nn-table { width: 100%; border-collapse: collapse; }
+.nn-table th, .nn-table td { padding: 4px 8px; vertical-align: middle; }
+.nn-table th { text-align: left; font-weight: 600; }
+.nn-table .nn-id { width: 1%; white-space: nowrap; }
+.nn-table .nn-sim { text-align: right; width: 1%; white-space: nowrap; }
+.tag-table { width: 100%; border-collapse: collapse; }
+.tag-table td { padding: 4px 8px; vertical-align: middle; }
+.tag-table .tag-name { text-align: left; }
+.tag-table .tag-pct { text-align: right; width: 1%; white-space: nowrap; }
 '''
 with gr.Blocks(css=custom_css) as demo:
     gr.Markdown("## Style Tagger — Skeleton (local dev first)")
     with gr.Row():
         with gr.Column():
+            image = gr.Image(label="Drop an image here", sources=["upload", "clipboard"],
+                             type="pil", show_label=False, elem_id="image_container")
+        # NEW: one right-side column that contains both cards stacked
         with gr.Column():
+            with gr.Column(elem_classes=["custom-card"]):
+                gr.Markdown("### Style Tags")
+                tag_panel = gr.Markdown()
+            with gr.Column(elem_classes=["custom-card"]):
+                gr.Markdown("### Nearest Neighbors")
+                neighbors_text = gr.Markdown()
+    image.upload(fn=infer, inputs=[image], outputs=[tag_panel, neighbors_text], show_progress="minimal")
+    image.clear(fn=clear_outputs, inputs=[], outputs=[tag_panel, neighbors_text])
     gr.Markdown("""
     ---
     ### Instructions
     - Drop an image in the box on the left.
+    - The "Style Tags" panel reports on tags that are stylistically similar to the query image.
+    - The "Nearest Neighbors" panel reports on e621 images that are stylistically similar to the query image.
+    ### Notes
+    - Links go to e621.net and may not be safe for work.
+    - I tried to isolate style from topic and was only partly successful.  So many reported tags and images might be topically rather than stylistically similar.
+    - The similarity metric is currently a bit naive, leading to irregularities like the "simple_background" tag being overreported due to its frequency.
     """)