Spaces:

FoodDesert
/

Prompt_Squirrel

Running

App Files Files Community

Food Desert commited on Sep 16, 2025

Commit

1136048

1 Parent(s): 41d10ff

Update UI (scroll cues, tooltips, model-specific tags, enter-to-run) and fixes

Browse files

Files changed (3) hide show

.gitignore +6 -0
app.py +689 -122
requirements.txt +1 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.venv/
+__pycache__/
+*.pyc
+*.log
+*.tmp
+.DS_Store

app.py CHANGED Viewed

@@ -23,9 +23,28 @@ import itertools
 from itertools import islice
 from pathlib import Path
 import logging
 # Set up logging
-logging.basicConfig(filename='error.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s:%(message)s')
 faq_content="""
@@ -35,7 +54,7 @@ faq_content="""
 Since Stable Diffusion's initial release in 2022, users have developed a myriad of fine-tuned text to image models, each with unique "linguistic" preferences depending on the data from which it was fine-tuned.
 Some models react best when prompted with verbose scene descriptions akin to DALL-E, while others fine-tuned on images scraped from popular image boards understand those boards' tag sets.
-This tool serves as a linguistic bridge to the e621 image board tag lexicon, on which many popular models such as Fluffyrock, Fluffusion, and Pony Diffusion v6 were trained.
 When you enter a txt2img prompt and press the "submit" button, Prompt Squirrel parses your prompt and checks that all your tags are valid e621 tags.
 If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unknown Tags" section.
@@ -115,16 +134,123 @@ Each subsequent row of images was generated using the same process, but with a d
 See SamplePrompts.csv for the list of prompts used and their descriptions.
 """
 nsfw_threshold = 0.95  # Assuming the threshold value is defined here
-css = """
-.scrollable-content {
-    max-height: 500px;
-    overflow-y: auto;
 }
 """
 grammar=r"""
 !start: (prompt | /[][():]/+)*
 prompt: (emphasized | plain | comma | WHITESPACE)*
@@ -139,6 +265,127 @@ plain: /([^,\\\[\]():|]|\\.)+/
 # Initialize the parser
 parser = Lark(grammar, start='start')
 # Function to extract tags
 def extract_tags(tree):
     tags_with_positions = []
@@ -160,7 +407,38 @@ def remove_special_tags(original_string):
     remaining_tags = [tag for tag in tags if tag not in special_tags]
     removed_tags = [tag for tag in tags if tag in special_tags]
     return ", ".join(remaining_tags), removed_tags
 # Define a function to load all necessary components
 def load_model_components(file_path):
@@ -182,7 +460,15 @@ def load_model_components(file_path):
 # Load all components at the start
 tf_idf_components = load_model_components('tf_idf_files_420.joblib')
 nsfw_tags = set()  # Initialize an empty set to store words meeting the threshold
 # Open and read the CSV file
@@ -218,18 +504,41 @@ def is_artist(name):
 sample_images_directory_path = 'sampleimages'
 def generate_artist_image_tuples(top_artists, image_directory):
     json_files = glob.glob(f'{image_directory}/*.json')
-    json_file_path = json_files[0] if json_files else None
     with open(json_file_path, 'r') as json_file:
         artist_to_file_map = json.load(json_file)
     filename = artist_to_file_map.get("")
-    image_path = os.path.join(image_directory, filename)
-    if os.path.exists(image_path):
-        baseline_tuple = [(image_path, "No Artist")]
     artist_image_tuples = []
     for artist in top_artists:
         filename = artist_to_file_map.get(artist)
         if filename:
             image_path = os.path.join(image_directory, filename)
             if os.path.exists(image_path):
@@ -321,7 +630,7 @@ def create_html_tables_for_tags(subtable_heading, item_heading, word_similarity_
     # Loop through the results and add table rows for each
     for word, sim in word_similarity_tuples:
         word_with_underscores = word.replace(' ', '_')
-        word_with_escaped_parentheses = word.replace("\\(", "(").replace("\\)", ")").replace("(", "\\(").replace(")", "\\)")
         count = tag2count.get(word_with_underscores.replace("\\(", "(").replace("\\)", ")"), 0)  # Get the count if available, otherwise default to 0
         tag_id, wiki_entry = tag2idwiki.get(word_with_underscores, (None, ''))
         # Check if tag_id and wiki_entry are valid
@@ -329,7 +638,10 @@ def create_html_tables_for_tags(subtable_heading, item_heading, word_similarity_
             # Construct the URL for the tag's wiki page
             wiki_url = f"https://e621.net/wiki_pages/{tag_id}"
             # Make the tag a hyperlink with a tooltip
-            tag_element = f"<a href='{wiki_url}' target='_blank' title='{wiki_entry}'>{word_with_escaped_parentheses}</a>"
         else:
             # Display the word without any hyperlink or tooltip
             tag_element = word_with_escaped_parentheses
@@ -341,36 +653,34 @@ def create_html_tables_for_tags(subtable_heading, item_heading, word_similarity_
 def create_top_artists_table(top_artists):
-    # Add a heading above the table
     html_str = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
-    html_str += "<h1>Top Artists</h1>"  # Heading for the table
-    # Start the table with increased font size and no borders between rows
     html_str += "<table style='font-size: 20px; border-collapse: collapse;'>"
     html_str += "<thead><tr><th>Artist</th><th>Similarity</th></tr></thead><tbody>"
-    # Loop through the top artists and add a row for each without the rank and without borders between rows
     for artist, score in top_artists:
-        artist_name = artist[3:] if artist.startswith("by ") else artist  # Remove "by " prefix
-        similarity_percentage = "{:.1f}%".format(score * 100)  # Convert score to percentage string with one decimal
-        html_str += f"<td style='padding: 3px 20px; border: none;'>{artist_name}</td><td style='padding: 3px 20px; border: none;'>{similarity_percentage}</td></tr>"
-    # Close the table HTML
     html_str += "</tbody></table></div>"
     return html_str
-def construct_pseudo_vector(pseudo_doc_terms, idf_loaded, tag_to_row_loaded):
-    # Initialize a vector of zeros with the length of the term_to_index mapping
-    pseudo_vector = np.zeros(len(tag_to_row_loaded))
-    # Fill in the vector for terms in the pseudo document
-    for term in pseudo_doc_terms:
-        if term in tag_to_row_loaded:
-            index = tag_to_row_loaded[term]
-            pseudo_vector[index] = idf_loaded.get(term, 0)
-    # Return the vector as a 2D array for compatibility with SVD transform
-    return pseudo_vector.reshape(1, -1)
 def get_top_indices(reduced_pseudo_vector, reduced_matrix):
@@ -388,36 +698,32 @@ def get_tfidf_reduced_similar_tags(pseudo_doc_terms, allow_nsfw_tags):
     idf = tf_idf_components['idf']
     term_to_column_index = tf_idf_components['tag_to_column_index']
     row_to_tag = tf_idf_components['row_to_tag']
-    reduced_matrix = tf_idf_components['reduced_matrix']
     svd = tf_idf_components['svd_model']
-    # Construct the TF-IDF vector
     pseudo_tfidf_vector = construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index)
-    # Reduce the dimensionality of the pseudo-document vector for the reduced matrix
-    reduced_pseudo_vector = svd.transform(pseudo_tfidf_vector)
-    # Compute cosine similarities in the reduced space
-    cosine_similarities_reduced = cosine_similarity(reduced_pseudo_vector, reduced_matrix).flatten()
-    # Sort the indices by descending cosine similarity
-    top_indices_reduced = np.argsort(cosine_similarities_reduced)
-    # Map indices to tags with their similarities
-    tag_similarity_dict = {row_to_tag[i]: cosine_similarities_reduced[i] for i in top_indices_reduced if i in row_to_tag}
     if not allow_nsfw_tags:
-        tag_similarity_dict = {tag: sim for tag, sim in tag_similarity_dict.items() if tag not in nsfw_tags}
-    tag_similarity_dict = {"by " + tag if is_artist(tag) else tag: sim for tag, sim in tag_similarity_dict.items()}
-    # Sort and transform tag names
     sorted_tag_similarity_dict = OrderedDict(sorted(tag_similarity_dict.items(), key=lambda x: x[1], reverse=True))
     transformed_sorted_tag_similarity_dict = OrderedDict(
-        (key.replace('_', ' ').replace('(', '\\(').replace(')', '\\)'), value)
-        for key, value in sorted_tag_similarity_dict.items()
     )
     return transformed_sorted_tag_similarity_dict
@@ -463,22 +769,62 @@ def find_similar_tags(test_tags, tag_to_context_similarity, context_similarity_w
         end_pos = tag_info['end_pos']
         node_type = tag_info['node_type']
         if modified_tag in special_tags:
             bad_entities.append({"entity":"Special", "start":start_pos, "end":end_pos})
             continue
         if modified_tag in encountered_modified_tags:
             bad_entities.append({"entity":"Duplicate", "start":start_pos, "end":end_pos})
             continue
         encountered_modified_tags.add(modified_tag)
-        modified_tag_for_search = modified_tag.replace(' ','_')
         similar_words = find_similar_tags.fasttext_small_model.most_similar(modified_tag_for_search, topn = 100)
         result, seen = [], set(transformed_tags)
         if modified_tag_for_search in find_similar_tags.tag2aliases:
             if modified_tag in find_similar_tags.tag2aliases and "_" in modified_tag:   #Implicitly tell the user that they should get rid of the underscore
-                result.append(modified_tag_for_search.replace('_',' '), 1)
                 seen.add(modified_tag)
             else:   #The user correctly did not put underscores in their tag
                 count = find_similar_tags.tag2count.get(modified_tag_for_search, 0)  # Get the count if available, otherwise default to 0
@@ -503,27 +849,47 @@ def find_similar_tags(test_tags, tag_to_context_similarity, context_similarity_w
                                 result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
                                 seen.add(similar_tag)
-        #Remove NSFW tags if appropriate.
         if not allow_nsfw_tags:
-            result = [(word, score) for word, score in result if word.replace(' ','_') not in nsfw_tags]
-        #Adjust score based on context
-        for i in range(len(result)):
-            word, score = result[i]  # Unpack the tuple
-            context_score = tag_to_context_similarity.get(word,0)
-            result[i] = (word, .5 * ((context_similarity_weight * context_score) + ((1 - context_similarity_weight) * score)))
         result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
         html_content += create_html_tables_for_tags(modified_tag, "Corrected Tag", result, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
         bad_entities.append({"entity":"Unknown Tag", "start":start_pos, "end":end_pos})
         tags_added=True
-    # If no tags were processed, add a message
     if not tags_added:
         html_content = create_html_placeholder(title="Unknown Tags", content="No Unknown Tags Found")
-    return html_content, bad_entities, known_entities_in_prompt  # Return list of lists for Dataframe
 def build_tag_offsets_dicts(new_image_tags_with_positions):
@@ -581,57 +947,91 @@ def augment_bad_entities_with_regex(text):
 def escape_html(text):
     return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("'", "&#039;")
 def format_annotated_html(bad_entities, known_entities, text):
     tooltip_map = {
         "Unknown Tag": "This may not be a valid e621 tag.  Consider removing or replacing it with tag(s) from the \"Unknown Tags\" section.",
         "Duplicate": "This tag has appeared multiple times in your prompt.  Consider removing the copies.",
         "Remove Final Comma": "There should be no comma at the end of your prompt.  Consider removing it.",
         "Move Comma Inside Parentheses": "In most e621-based models, the comma following a tag functions as an &quot;attention anchor&quot;, carrying most of the tag&apos;s information. It should therefore be assigned the same weight as the rest of the tag. So instead of &quot;(lineless:1.1),&quot;, consider &quot;(lineless,:1.1)&quot; or &quot;(lineless,)&quot;",
-        "Double Comma": "One comma between tags is considered ample."
     }
     color_map = {
-        "Unknown Tag": ("white", "red"),     # White text on red background
-        "Duplicate": ("black", "yellow"),   # Black text on yellow background
-        "Remove Final Comma": ("white", "blue"),  # White text on blue background
-        "Move Comma Inside Parentheses": ("white", "green"),  # White text on green background
-        "Double Comma": ("white","orange")
     }
-    # Combine and sort entities
-    combined_entities = bad_entities + known_entities
-    combined_entities = sorted(combined_entities, key=lambda x: x['start'],reverse=True)
-    # Generate HTML for the main text
     html_text = text
-    for entity in combined_entities:
-        start = entity['start']
-        end = entity['end']
-        label = entity['entity']
         if label == "Known Tag":
-            wiki_url = entity.get('wiki_url', '')
-            count = entity['count']
-            wiki_entry = entity.get('wiki_entry', '')
-            sanitized_wiki_entry = escape_html(wiki_entry) if wiki_entry else 'Unavailable'
-            if wiki_url:  # Check if wiki_url is not empty
-                html_part = f'<a href="{wiki_url}" target="_blank" title="Count: {count}\tWiki: {sanitized_wiki_entry}" style="text-decoration: none; cursor: pointer; font-style: italic;">{text[start:end]}</a>'
             else:
-                html_part = f'<span title="Count: {count}\tWiki: {sanitized_wiki_entry}" style="text-decoration: none; cursor: help; font-style: italic;">{text[start:end]}</span>'
         else:
-            color = color_map.get(label, ("black", "white"))
-            html_part = f'<span style="background-color: {color[1]}; color: {color[0]};">{text[start:end]}</span>'
         html_text = html_text[:start] + html_part + html_text[end:]
-    # Generate HTML for the color key
     color_key_html = "<div style='text-align: right; margin-top: 20px;'>Key:"
-    used_labels = set(entity['entity'] for entity in bad_entities)
-    for label, colors in color_map.items():
         if label in used_labels:
             tooltip = tooltip_map.get(label, "")
-            # Adding margin-right for spacing between items
-            color_key_html += f" <span style='background-color: {colors[1]}; color: {colors[0]}; margin-right: 10px;' title='{tooltip}'>{label}</span>"
     color_key_html += "</div>"
-    return f'<div style="padding: 10px; font-size: 16px;">{html_text}</div>{color_key_html}'
 def find_similar_artists(original_tags_string, top_n, context_similarity_weight, allow_nsfw_tags):
@@ -648,8 +1048,9 @@ def find_similar_artists(original_tags_string, top_n, context_similarity_weight,
         #Suggested tags stuff
         suggested_tags_html_content = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
         suggested_tags_html_content += "<h1>Suggested Tags</h1>"  # Heading for the table
-        suggested_tags = get_tfidf_reduced_similar_tags([item["tf_idf_matrix_tag"] for item in tag_data] + removed_tags, allow_nsfw_tags)
         unseen_tags_data, bad_entities, known_entities = find_similar_tags(tag_data, suggested_tags, context_similarity_weight, allow_nsfw_tags)
         #Bad tags stuff
@@ -660,40 +1061,130 @@ def find_similar_artists(original_tags_string, top_n, context_similarity_weight,
         # Create a set of tags that should be filtered out
         filter_tags = {entry["original_tag"].strip() for entry in tag_data}
-        # Use this set to filter suggested_tags
-        suggested_tags_filtered = OrderedDict((k, v) for k, v in suggested_tags.items() if k not in filter_tags)
         # Splitting the dictionary into two based on the condition
-        suggested_artist_tags_filtered = OrderedDict((k, v) for k, v in suggested_tags_filtered.items() if k.startswith("by "))
-        suggested_non_artist_tags_filtered = OrderedDict((k, v) for k, v in suggested_tags_filtered.items() if not k.startswith("by ") and k not in special_tags)
         topnsuggestions = list(islice(suggested_non_artist_tags_filtered.items(), 100))
         suggested_tags_html_content += create_html_tables_for_tags("-", "Suggested Tag", topnsuggestions, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
-        #Artist stuff
-        excluded_artists = ["by conditional dnp", "by unknown artist"]
-        top_artists = [(key, value) for key, value in  suggested_artist_tags_filtered.items() if key.lower() not in excluded_artists][:top_n]
         top_artists_str = create_top_artists_table(top_artists)
         dynamic_prompts_formatted_artists = "{" + "|".join([artist for artist, _ in top_artists]) + "}"
         image_galleries = []
         for root, dirs, files in os.walk(sample_images_directory_path):
             for name in dirs:
-                baseline, artists = generate_artist_image_tuples([name[3:] for name, _ in top_artists], os.path.join(root, name))
                 image_galleries.append(baseline)  # Add baseline as its own gallery item
                 image_galleries.append(artists)  # Extend the list with artist tuples
         return (unseen_tags_data, bad_tags_illustrated_html, suggested_tags_html_content, top_artists_str, dynamic_prompts_formatted_artists, *image_galleries)
-    except ParseError as e:
-        return [], "Parse Error: Check for mismatched parentheses or something", "", "", None, None
 with gr.Blocks(css=css) as app:
     with gr.Group():
         with gr.Row():
-            with gr.Column(scale=3):
-                image_tags = gr.Textbox(label="Enter Prompt", placeholder="e.g. fox, outside, detailed background, ...")
-                #bad_tags_illustrated_string = gr.HighlightedText(show_legend=True, color_map={"Unknown Tag":"red","Duplicate":"yellow","Remove Final Comma":"purple","Move Comma Inside Parentheses":"green"}, label="Annotated Prompt")
                 bad_tags_illustrated_string = gr.HTML()
             with gr.Column(scale=1):
                 gr.HTML(
@@ -708,23 +1199,92 @@ with gr.Blocks(css=css) as app:
             with gr.Group():
                 with gr.Row():
                     context_similarity_weight = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Context Similarity Weight")
-                    allow_nsfw = gr.Checkbox(label="Allow NSFW Tags", value=False)
                 with gr.Row():
                     with gr.Column(scale=2):
-                        unseen_tags = gr.HTML(label="Unknown Tags", value=create_html_placeholder(title="Unknown Tags"))
                     with gr.Column(scale=1):
-                        suggested_tags = gr.HTML(label="Suggested Tags", value=create_html_placeholder(title="Suggested Tags"))
         with gr.Column(scale=1):
             with gr.Group():
                 num_artists = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists")
-                top_artists = gr.HTML(label="Top Artists", value=create_html_placeholder(title="Top Artists"))
                 dynamic_prompts = gr.Textbox(label="Dynamic Prompts Format", info="For if you're using the Automatic1111 webui (https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the Dynamic Prompts extension activated (https://github.com/adieyal/sd-dynamic-prompts) and want to try them all individually.")
     galleries = []
     for root, dirs, files in os.walk(sample_images_directory_path):
         for name in dirs:
             with gr.Row():
                 baseline = gr.Gallery(allow_preview=False, rows=1, columns=1, height=420, scale=3)
-                styles = gr.Gallery(preview=False, rows=2, columns=5, height=420, scale=8)
                 galleries.extend([baseline, styles])
     submit_button.click(
@@ -732,6 +1292,13 @@ with gr.Blocks(css=css) as app:
         inputs=[image_tags, num_artists, context_similarity_weight, allow_nsfw],
         outputs=[unseen_tags, bad_tags_illustrated_string, suggested_tags, top_artists, dynamic_prompts] + galleries
     )
     gr.Markdown(faq_content)

 from itertools import islice
 from pathlib import Path
 import logging
+import hnswlib
+import pathlib
+from collections import Counter
 # Set up logging
+# Minimal prod logging: warnings+ to stderr, no file by default
+import os, logging
+LOG_LEVEL = os.environ.get("PSQ_LOG_LEVEL", "WARNING").upper()
+logging.basicConfig(
+    level=getattr(logging, LOG_LEVEL, logging.WARNING),
+    format="%(asctime)s %(levelname)s:%(message)s",
+    handlers=[logging.StreamHandler()]  # no file -> avoids huge logs on Spaces
+)
+# Quiet down common noisy libs (optional)
+for _name in ("gensim", "gradio", "hnswlib", "httpx", "uvicorn"):
+    logging.getLogger(_name).setLevel(logging.ERROR)
+# Turn off Gradio analytics phone-home to avoid those background thread errors (optional)
+os.environ["GRADIO_ANALYTICS_ENABLED"] = "0"
 faq_content="""
 Since Stable Diffusion's initial release in 2022, users have developed a myriad of fine-tuned text to image models, each with unique "linguistic" preferences depending on the data from which it was fine-tuned.
 Some models react best when prompted with verbose scene descriptions akin to DALL-E, while others fine-tuned on images scraped from popular image boards understand those boards' tag sets.
+This tool serves as a linguistic bridge to the e621 image board tag lexicon, on which many popular models such as Fluffyrock, NoobAI, and Pony Diffusion v6 were trained.
 When you enter a txt2img prompt and press the "submit" button, Prompt Squirrel parses your prompt and checks that all your tags are valid e621 tags.
 If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unknown Tags" section.
 See SamplePrompts.csv for the list of prompts used and their descriptions.
 """
+TOOLTIP_NOTE_HTML = '<div class="hover-hint">Underlined items can be hovered for more info.</div>'
+HOVER_HINT_CSS = """
+/* Solid, visible underline for tagged items */
+.gradio-container .hover-underline{
+  text-decoration-line: underline !important;
+  text-decoration-thickness: 2px;
+  text-underline-offset: 2px;
+}
+/* Small, subtle hint text */
+.hover-hint{
+  font-size: 12px;
+  opacity: .85;
+  line-height: 1.2;
+}
+/* Wrapper to position the hint in the bottom-right of the annotated box */
+.annotated-wrap{ position: relative; }
+.annotated-wrap .hover-hint{
+  position: absolute;
+  right: 6px;
+  bottom: 6px;
+  text-align: right;
+}
+"""
+try:
+    from gradio_client import utils as _gc_utils
+    _orig_get_type = _gc_utils.get_type
+    _orig_j2p = _gc_utils._json_schema_to_python_type
+    _orig_pub = _gc_utils.json_schema_to_python_type
+    def _get_type_safe(schema):
+        # Sometimes schema is a bare True/False (JSON Schema boolean form)
+        if not isinstance(schema, dict):
+            return "any"
+        return _orig_get_type(schema)
+    def _j2p_safe(schema, defs=None):
+        # Accept non-dict schemas (True/False/None) and treat as "any"
+        if not isinstance(schema, dict):
+            return "any"
+        return _orig_j2p(schema, defs or schema.get("$defs"))
+    def _pub_safe(schema):
+        # Public wrapper used by Gradio; keep it resilient too
+        if not isinstance(schema, dict):
+            return "any"
+        return _j2p_safe(schema, schema.get("$defs"))
+    _gc_utils.get_type = _get_type_safe
+    _gc_utils._json_schema_to_python_type = _j2p_safe
+    _gc_utils.json_schema_to_python_type = _pub_safe
+except Exception as e:
+    print("gradio_client hotfix not applied:", e)
+# -------------------------------------------------------------------------------
 nsfw_threshold = 0.95  # Assuming the threshold value is defined here
+css = HOVER_HINT_CSS + """
+.scrollable-content{
+  max-height: 420px;
+  overflow-y: scroll;          /* always show scrollbar */
+  overflow-x: hidden;
+  padding-right: 8px;
+  padding-bottom: 14px;   /* <— add this */
+  scrollbar-gutter: stable;    /* prevent layout shift as it fills */
+  /* Firefox */
+  scrollbar-width: auto;
+  scrollbar-color: rgba(180,180,180,.9) rgba(0,0,0,.15);
+}
+/* WebKit/Chromium (Chrome/Edge/Safari) */
+.scrollable-content::-webkit-scrollbar{ width: 10px; }
+.scrollable-content::-webkit-scrollbar-thumb{ background: rgba(180,180,180,.9); border-radius: 8px; }
+.scrollable-content::-webkit-scrollbar-track{ background: rgba(0,0,0,.15); }
+/* --- Fade that blends into the pane background, no chip --- */
+.scroll-fade {
+  position: relative;
+  /* ensure our ::after overlay paints above children */
+  isolation: isolate;
 }
+.scroll-fade::after{
+  content: "";
+  position: absolute;
+  left: 0; right: 0; bottom: 0;
+  height: 20px;                 /* a hair taller; tweak if you like */
+  pointer-events: none;
+  /* transparent → panel background (Gradio theme var, with dark fallback) */
+  background: linear-gradient(
+    to bottom,
+    rgba(0,0,0,0),
+    var(--background-fill-secondary, #1f2937)
+  );
+  transition: opacity .18s ease;
+  z-index: 3;                   /* sit above the scroller’s content */
+}
+.scroll-fade.at-bottom::after { opacity: 0; }
+/* no chip */
+.scroll-fade::before { content: none; }
 """
+#Parser
 grammar=r"""
 !start: (prompt | /[][():]/+)*
 prompt: (emphasized | plain | comma | WHITESPACE)*
 # Initialize the parser
 parser = Lark(grammar, start='start')
+# ---------- Two HNSW indexes: artists and non-artist tags ----------
+_HNSW_ART = None
+_HNSW_TAG = None
+_HNSW_DIM = None
+_HNSW_N_ART = None
+_HNSW_N_TAG = None
+_HNSW_ART_PATH = pathlib.Path("tfidf_hnsw_artists.bin")
+_HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
+def _l2_normalize_rows(mat: np.ndarray) -> np.ndarray:
+    mat = np.asarray(mat, dtype=np.float32)
+    norms = np.linalg.norm(mat, axis=1, keepdims=True)
+    norms[norms == 0.0] = 1.0
+    return mat / norms
+def _ensure_dual_hnsw_indexes():
+    """
+    Build/load two HNSW indexes over the SVD-reduced TF-IDF matrix:
+      • _HNSW_ART — rows whose tag (with optional 'by_' stripped) is in the artist_set
+      • _HNSW_TAG — only rows that are NOT artist tags
+    Index item IDs are the ORIGINAL row indices in reduced_matrix.
+    """
+    global _HNSW_ART, _HNSW_TAG, _HNSW_DIM, _HNSW_N_ART, _HNSW_N_TAG
+    if _HNSW_ART is not None and _HNSW_TAG is not None:
+        return
+    reduced_matrix = tf_idf_components['reduced_matrix']         # (N, D)
+    row_to_tag = tf_idf_components['row_to_tag']                 # {row:int -> "tag_with_underscores"}
+    rm = _l2_normalize_rows(reduced_matrix).astype(np.float32)
+    n_items, dim = rm.shape
+    # Partition rows
+    artist_rows = []
+    tag_rows = []
+    for i in range(n_items):
+        tag = row_to_tag.get(i, "")
+        # Strip leading "by_" if present in the TF-IDF vocabulary, but don't rely on it.
+        base = tag[3:] if tag.startswith("by_") else tag
+        # Some corpora contain buckets you don't want shown as artists:
+        if tag in {"by_unknown_artist", "by_conditional_dnp"}:
+            tag_rows.append(i)
+            continue
+        if is_artist(base):
+            artist_rows.append(i)
+        else:
+            tag_rows.append(i)
+    logging.debug(f"HNSW partition: artists={len(artist_rows)} non_artists={len(tag_rows)}")
+    # Helper: build or load an index for a subset of rows
+    def _build_or_load(path: pathlib.Path, rows: list[int]) -> hnswlib.Index:
+        idx = hnswlib.Index(space='cosine', dim=dim)
+        need_build = True
+        if path.exists():
+            try:
+                idx.load_index(str(path), max_elements=max(1, len(rows)))
+                # Rebuild if the saved index count doesn’t match our rows
+                if getattr(idx, "get_current_count", None) and idx.get_current_count() == len(rows) and len(rows) > 0:
+                    need_build = False
+                else:
+                    logging.debug(f"Rebuilding {path.name}: saved_count!=rows_len ({idx.get_current_count()} vs {len(rows)})")
+            except Exception as e:
+                logging.debug(f"Reload {path.name} failed, rebuilding: {e}")
+        if need_build:
+            try:
+                if path.exists():
+                    path.unlink()
+            except Exception:
+                pass
+            idx.init_index(max_elements=max(1, len(rows)), ef_construction=200, M=16)
+            if rows:
+                idx.add_items(rm[rows], ids=np.asarray(rows, dtype=np.int32))
+            idx.save_index(str(path))
+        idx.set_ef(200)
+        return idx
+    _HNSW_ART = _build_or_load(_HNSW_ART_PATH, artist_rows)
+    _HNSW_TAG = _build_or_load(_HNSW_TAG_PATH, tag_rows)
+    _HNSW_DIM = dim
+    _HNSW_N_ART = len(artist_rows)
+    _HNSW_N_TAG = len(tag_rows)
+def _hnsw_query(idx: hnswlib.Index, vec: np.ndarray, k: int):
+    """
+    Query a given HNSW index with a (1, D) or (D,) vector in SVD space.
+    Returns (indices, sims) with cosine similarity scores.
+    """
+    _ensure_dual_hnsw_indexes()
+    q = np.asarray(vec, dtype=np.float32).reshape(-1)
+    q_norm = np.linalg.norm(q)
+    if q_norm > 0:
+        q = q / q_norm
+    labels, dists = idx.knn_query(q, k=k)
+    inds = labels[0]
+    sims = 1.0 - dists[0]  # cosine distance -> similarity
+    return inds, sims
+def _ann_tags_topk(vec: np.ndarray, k: int):
+    _ensure_dual_hnsw_indexes()
+    k = min(k, _HNSW_N_TAG if _HNSW_N_TAG else 0)
+    return _hnsw_query(_HNSW_TAG, vec, k) if k else (np.array([], dtype=int), np.array([], dtype=float))
+def _ann_artists_topk(vec: np.ndarray, k: int):
+    _ensure_dual_hnsw_indexes()
+    k = min(k, _HNSW_N_ART if _HNSW_N_ART else 0)
+    return _hnsw_query(_HNSW_ART, vec, k) if k else (np.array([], dtype=int), np.array([], dtype=float))
+# ------------------------------------------------------------------
+def _norm_tag_for_lookup(s: str) -> str:
+    # convert "name with spaces" -> "name_with_spaces" and unescape parens
+    return s.replace(' ', '_').replace('\\(', '(').replace('\\)', ')')
 # Function to extract tags
 def extract_tags(tree):
     tags_with_positions = []
     remaining_tags = [tag for tag in tags if tag not in special_tags]
     removed_tags = [tag for tag in tags if tag in special_tags]
     return ", ".join(remaining_tags), removed_tags
+#Model specific tags
+MODEL_SPECIFIC_TAGS = {
+    "masterpiece",
+    "best quality",
+    "good quality",
+    "normal quality",
+    "newest",
+    "absurdres",
+    "highres",
+    "safe",
+    "worst quality",
+    "early",
+    "low quality",
+    "lowres",
+    "explict content",
+    "very awa",
+    "worst aesthetic",
+    "score_9",
+    "score_8_up",
+    "score_7_up",
+    "score_6_up",
+    "score_5_up",
+    "score_4_up",
+    "source_pony",
+    "source_furry",
+    "source_cartoon",
+    "source_anime",
+    "rating_safe",
+    "rating_questionable",
+    "rating_explicit"
+}
 # Define a function to load all necessary components
 def load_model_components(file_path):
 # Load all components at the start
 tf_idf_components = load_model_components('tf_idf_files_420.joblib')
+idf = tf_idf_components['idf']
+if isinstance(idf, dict):
+    # idf is term -> idf_value; build a column-aligned vector
+    t2c = tf_idf_components['tag_to_column_index']
+    n_cols = max(t2c.values()) + 1
+    idf_by_col = np.ones(n_cols, dtype=np.float32)
+    for term, col in t2c.items():
+        idf_by_col[col] = float(idf.get(term, 1.0))
+    tf_idf_components['idf'] = idf_by_col
 nsfw_tags = set()  # Initialize an empty set to store words meeting the threshold
 # Open and read the CSV file
 sample_images_directory_path = 'sampleimages'
 def generate_artist_image_tuples(top_artists, image_directory):
     json_files = glob.glob(f'{image_directory}/*.json')
+    if not json_files:
+        return [], []  # no mapping present; return empty galleries safely
+    json_file_path = json_files[0]
     with open(json_file_path, 'r') as json_file:
         artist_to_file_map = json.load(json_file)
+     # DEBUG: mapping + baseline info
+    logging.debug("Gallery %s: loaded %d entries (map file=%s)",
+                  image_directory, len(artist_to_file_map), json_file_path)
+    _base = artist_to_file_map.get("")
+    logging.debug(
+        "Gallery %s: baseline '' -> %r (exists=%s)",
+        image_directory,
+        _base,
+        os.path.exists(os.path.join(image_directory, _base)) if _base else None,
+    )
+    baseline_tuple = []
     filename = artist_to_file_map.get("")
+    if filename:
+        image_path = os.path.join(image_directory, filename)
+        if os.path.exists(image_path):
+            baseline_tuple = [(image_path, "No Artist")]
     artist_image_tuples = []
     for artist in top_artists:
         filename = artist_to_file_map.get(artist)
+        # DEBUG: per-artist resolution
+        logging.debug(
+            "Gallery %s: %s -> %r (exists=%s)",
+            image_directory,
+            artist,
+            filename,
+            os.path.exists(os.path.join(image_directory, filename)) if filename else None,
+        )
         if filename:
             image_path = os.path.join(image_directory, filename)
             if os.path.exists(image_path):
     # Loop through the results and add table rows for each
     for word, sim in word_similarity_tuples:
         word_with_underscores = word.replace(' ', '_')
+        word_with_escaped_parentheses = escape_parens_for_display(word)
         count = tag2count.get(word_with_underscores.replace("\\(", "(").replace("\\)", ")"), 0)  # Get the count if available, otherwise default to 0
         tag_id, wiki_entry = tag2idwiki.get(word_with_underscores, (None, ''))
         # Check if tag_id and wiki_entry are valid
             # Construct the URL for the tag's wiki page
             wiki_url = f"https://e621.net/wiki_pages/{tag_id}"
             # Make the tag a hyperlink with a tooltip
+            tag_element = (
+                f"<a class='hover-underline' href='{wiki_url}' target='_blank' "
+                f"title='{wiki_entry}'>{word_with_escaped_parentheses}</a>"
+            )
         else:
             # Display the word without any hyperlink or tooltip
             tag_element = word_with_escaped_parentheses
 def create_top_artists_table(top_artists):
     html_str = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
+    html_str += "<h1>Top Artists</h1>"
     html_str += "<table style='font-size: 20px; border-collapse: collapse;'>"
     html_str += "<thead><tr><th>Artist</th><th>Similarity</th></tr></thead><tbody>"
     for artist, score in top_artists:
+        artist_disp = escape_html(escape_parens_for_display(artist))
+        similarity_percentage = "{:.1f}%".format(score * 100)
+        html_str += (
+            f"<tr><td style='padding: 3px 20px; border: none;'>{artist_disp}</td>"
+            f"<td style='padding: 3px 20px; border: none;'>{similarity_percentage}</td></tr>"
+        )
     html_str += "</tbody></table></div>"
     return html_str
+def construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index):
+    cols, data = [], []
+    for term, w in pseudo_doc_terms.items():
+        j = term_to_column_index.get(term)
+        if j is None:
+            continue
+        cols.append(j)
+        data.append(w * idf[j])
+    n_cols = len(idf)
+    indptr = [0, len(cols)]
+    return csr_matrix((data, cols, indptr), shape=(1, n_cols), dtype=np.float32)
 def get_top_indices(reduced_pseudo_vector, reduced_matrix):
     idf = tf_idf_components['idf']
     term_to_column_index = tf_idf_components['tag_to_column_index']
     row_to_tag = tf_idf_components['row_to_tag']
     svd = tf_idf_components['svd_model']
+    # 1) Build the pseudo TF-IDF, reduce to SVD space (unchanged)
     pseudo_tfidf_vector = construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index)
+    reduced_pseudo_vector = svd.transform(pseudo_tfidf_vector)  # shape (1, D)
+    # 2) ANN: only fetch nearest non-artist candidates (no full-matrix cosine)
+    K = 2000  # tune for speed/recall
+    top_inds, top_sims = _ann_tags_topk(reduced_pseudo_vector, k=K)
+    # 3) Build similarity dict from those candidates
+    tag_similarity_dict = {}
+    for i, sim in zip(top_inds, top_sims):
+        tag = row_to_tag.get(int(i))
+        if tag is not None:
+            tag_similarity_dict[tag] = float(sim)
     if not allow_nsfw_tags:
+        tag_similarity_dict = {t: s for t, s in tag_similarity_dict.items() if t not in nsfw_tags}
+    # 4) Sort & escape like before
     sorted_tag_similarity_dict = OrderedDict(sorted(tag_similarity_dict.items(), key=lambda x: x[1], reverse=True))
     transformed_sorted_tag_similarity_dict = OrderedDict(
+        (key.replace('_', ' ').replace('(', '\\(').replace(')', '\\)'), val)
+        for key, val in sorted_tag_similarity_dict.items()
     )
     return transformed_sorted_tag_similarity_dict
         end_pos = tag_info['end_pos']
         node_type = tag_info['node_type']
+        # Build the underscore form up-front
+        modified_tag_for_search = modified_tag.replace(' ', '_')
         if modified_tag in special_tags:
             bad_entities.append({"entity":"Special", "start":start_pos, "end":end_pos})
             continue
+        # Only accept exact underscore model-specific tokens (e.g., "score_9")
+        # special score/rating tags (kept as-is)
+        if modified_tag in special_tags:
+            bad_entities.append({"entity": "Special", "start": start_pos, "end": end_pos})
+            continue
+        # Model-specific tokens must match the user's input *exactly* (no pre-normalization).
+        # Use the original token as typed in the prompt, lowercased.
+        original_raw = tag_info["original_tag"].strip().lower()
+        if original_raw in MODEL_SPECIFIC_TAGS:
+            bad_entities.append({"entity": "Model Specific", "start": start_pos, "end": end_pos})
+            continue
         if modified_tag in encountered_modified_tags:
             bad_entities.append({"entity":"Duplicate", "start":start_pos, "end":end_pos})
             continue
         encountered_modified_tags.add(modified_tag)
+        norm_artist = (
+            modified_tag_for_search
+            .lower()
+            .removeprefix('by_')          # tolerate users typing "by something" or not
+        )
+        if is_artist(norm_artist):
+            by_key = f"by_{norm_artist}"
+            # try by_* first, then raw form as fallback
+            count = (find_similar_tags.tag2count.get(by_key) or
+                     find_similar_tags.tag2count.get(modified_tag_for_search, 0))
+            tag_id, wiki_entry = (
+                find_similar_tags.tag2idwiki.get(by_key) or
+                find_similar_tags.tag2idwiki.get(modified_tag_for_search, (None, ''))
+            )
+            wiki_url = f"https://e621.net/wiki_pages/{tag_id}" if tag_id is not None and wiki_entry else ""
+            known_entities_in_prompt.append({
+                "entity": "Known Tag",
+                "start": start_pos,
+                "end": end_pos,
+                "count": count,
+                "wiki_url": wiki_url,
+                "wiki_entry": wiki_entry
+            })
+            continue
         similar_words = find_similar_tags.fasttext_small_model.most_similar(modified_tag_for_search, topn = 100)
         result, seen = [], set(transformed_tags)
         if modified_tag_for_search in find_similar_tags.tag2aliases:
             if modified_tag in find_similar_tags.tag2aliases and "_" in modified_tag:   #Implicitly tell the user that they should get rid of the underscore
+                result.append((modified_tag_for_search.replace('_',' '), 1))
                 seen.add(modified_tag)
             else:   #The user correctly did not put underscores in their tag
                 count = find_similar_tags.tag2count.get(modified_tag_for_search, 0)  # Get the count if available, otherwise default to 0
                                 result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
                                 seen.add(similar_tag)
+        # Remove NSFW tags if appropriate.
         if not allow_nsfw_tags:
+            result = [(w, s) for (w, s) in result if w.replace(' ', '_') not in nsfw_tags]
+        # --- Context re-scoring (keys match how get_tfidf_reduced_similar_tags formats them) ---
+        def _ctx_score(name: str) -> float:
+            v = tag_to_context_similarity.get(name)
+            if v is None:
+                # TF-IDF dict escapes parentheses; candidates from FT do not.
+                v = tag_to_context_similarity.get(name.replace('(', '\\(').replace(')', '\\)'))
+            return float(v) if v is not None else 0.0
+        # If the slider is at 1.0, only keep candidates that exist in the TF-IDF context list.
+        if context_similarity_weight >= 0.999:
+            ctx_keys = set(tag_to_context_similarity.keys())
+            result = [
+                (w, s) for (w, s) in result
+                if (w in ctx_keys) or (w.replace('(', '\\(').replace(')', '\\)') in ctx_keys)
+            ]
+        # Linear blend: final = (1-λ)*fasttext + λ*context  (no extra 0.5 scaling)
+        result = [
+            (w, (1.0 - context_similarity_weight) * s + context_similarity_weight * _ctx_score(w))
+            for (w, s) in result
+        ]
         result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
         html_content += create_html_tables_for_tags(modified_tag, "Corrected Tag", result, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
         bad_entities.append({"entity":"Unknown Tag", "start":start_pos, "end":end_pos})
         tags_added=True
+    # If no tags were processed, add a message; otherwise close the wrapper div
     if not tags_added:
         html_content = create_html_placeholder(title="Unknown Tags", content="No Unknown Tags Found")
+    else:
+        html_content += "</div>"
+    return html_content, bad_entities, known_entities_in_prompt
 def build_tag_offsets_dicts(new_image_tags_with_positions):
 def escape_html(text):
     return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("'", "&#039;")
+def escape_parens_for_display(s: str) -> str:
+    # ensure single backslash before any literal parens in display text
+    return (
+        s.replace("\\(", "(")
+         .replace("\\)", ")")
+         .replace("(", "\\(")
+         .replace(")", "\\)")
+    )
 def format_annotated_html(bad_entities, known_entities, text):
     tooltip_map = {
         "Unknown Tag": "This may not be a valid e621 tag.  Consider removing or replacing it with tag(s) from the \"Unknown Tags\" section.",
         "Duplicate": "This tag has appeared multiple times in your prompt.  Consider removing the copies.",
         "Remove Final Comma": "There should be no comma at the end of your prompt.  Consider removing it.",
         "Move Comma Inside Parentheses": "In most e621-based models, the comma following a tag functions as an &quot;attention anchor&quot;, carrying most of the tag&apos;s information. It should therefore be assigned the same weight as the rest of the tag. So instead of &quot;(lineless:1.1),&quot;, consider &quot;(lineless,:1.1)&quot; or &quot;(lineless,)&quot;",
+        "Double Comma": "One comma between tags is considered ample.",
+        "Model Specific": "This is not an e621 tag, but may still be valid with the right model.  Check your model&#39;s documentation.  If the tag is not mentioned in the documentation, do not use it."
     }
     color_map = {
+        "Unknown Tag": ("white", "red"),
+        "Duplicate": ("black", "yellow"),
+        "Move Comma Inside Parentheses": ("white", "green"),
+        "Double Comma": ("white", "orange"),
+        "Model Specific": ("black", "lightgray"),
+        "Remove Final Comma": ("white", "brown")
     }
+    # Splice from the original text so indexes stay valid.
+    combined = sorted(bad_entities + known_entities, key=lambda x: x["start"], reverse=True)
     html_text = text
+    for entity in combined:
+        start = entity["start"]
+        end = entity["end"]
+        label = entity["entity"]
+        # Escape only the replaced segment (keeps indices correct).
+        segment = text[start:end]
+        disp = escape_html(escape_parens_for_display(segment))
         if label == "Known Tag":
+            wiki_url = entity.get("wiki_url", "")
+            count = entity.get("count", 0)
+            wiki_entry = entity.get("wiki_entry", "")
+            sanitized_wiki_entry = escape_html(wiki_entry) if wiki_entry else "Unavailable"
+            if wiki_url:
+                html_part = (
+                    f'<a class="hover-underline" href="{wiki_url}" target="_blank" '
+                    f'title="Count: {count}\tWiki: {sanitized_wiki_entry}" '
+                    f'style="cursor: pointer; font-style: italic;">{disp}</a>'
+                )
             else:
+                html_part = (
+                    f'<span class="hover-underline" title="Count: {count}\tWiki: {sanitized_wiki_entry}" '
+                    f'style="cursor: help; font-style: italic;">{disp}</span>'
+                )
         else:
+            fg, bg = color_map.get(label, ("black", "white"))
+            html_part = f'<span style="background-color: {bg}; color: {fg};">{disp}</span>'
         html_text = html_text[:start] + html_part + html_text[end:]
+    # Color key (only for labels that actually appeared)
     color_key_html = "<div style='text-align: right; margin-top: 20px;'>Key:"
+    used_labels = {e["entity"] for e in bad_entities}
+    for label, (fg, bg) in color_map.items():
         if label in used_labels:
             tooltip = tooltip_map.get(label, "")
+            color_key_html += (
+                f" <span class='hover-underline' style='background-color: {bg}; color: {fg}; margin-right: 10px;' "
+                f"title='{tooltip}'>{label}</span>"
+            )
     color_key_html += "</div>"
+    # Wrap the whole annotated area so we can place the hint inside it
+    annotated_box = (
+        "<div class='annotated-wrap' style='padding:10px;font-size:16px;'>"
+        f"{html_text}"
+        f"{TOOLTIP_NOTE_HTML}"
+        "</div>"
+    )
+    return annotated_box + color_key_html
 def find_similar_artists(original_tags_string, top_n, context_similarity_weight, allow_nsfw_tags):
         #Suggested tags stuff
         suggested_tags_html_content = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
         suggested_tags_html_content += "<h1>Suggested Tags</h1>"  # Heading for the table
+        terms = [item["tf_idf_matrix_tag"] for item in tag_data] + removed_tags
+        suggested_tags = get_tfidf_reduced_similar_tags(dict(Counter(terms)), allow_nsfw_tags)
         unseen_tags_data, bad_entities, known_entities = find_similar_tags(tag_data, suggested_tags, context_similarity_weight, allow_nsfw_tags)
         #Bad tags stuff
         # Create a set of tags that should be filtered out
         filter_tags = {entry["original_tag"].strip() for entry in tag_data}
+        filter_tags_norm = { _norm_tag_for_lookup(t.lower().removeprefix('by ').removeprefix('by_')) for t in filter_tags }
+        suggested_tags_filtered = OrderedDict(
+            (k, v) for k, v in suggested_tags.items()
+            if k not in filter_tags and _norm_tag_for_lookup(k.lower()) not in filter_tags_norm
+        )
         # Splitting the dictionary into two based on the condition
+        def _norm_no_by(s: str) -> str:
+            n = _norm_tag_for_lookup(s)
+            return n[3:] if n.startswith("by_") else n
+        suggested_artist_tags_filtered = OrderedDict(
+            (k, v) for k, v in suggested_tags_filtered.items()
+            if is_artist(_norm_no_by(k))
+        )
+        suggested_non_artist_tags_filtered = OrderedDict(
+            (k, v) for k, v in suggested_tags_filtered.items()
+            if not is_artist(_norm_no_by(k)) and k not in special_tags
+        )
         topnsuggestions = list(islice(suggested_non_artist_tags_filtered.items(), 100))
         suggested_tags_html_content += create_html_tables_for_tags("-", "Suggested Tag", topnsuggestions, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
+        suggested_tags_html_content += "</div>"
+        # --- Artist stuff: query artist-only index directly ---
+        idf_vec = tf_idf_components['idf']
+        t2c = tf_idf_components['tag_to_column_index']
+        svd = tf_idf_components['svd_model']
+        pseudo_terms = dict(Counter(terms))
+        pseudo_vec = construct_pseudo_vector(pseudo_terms, idf_vec, t2c)
+        reduced_q = svd.transform(pseudo_vec)
+        K_art = max(100, top_n * 10)  # widen search to stabilize ranks
+        art_inds, art_sims = _ann_artists_topk(reduced_q, k=K_art)
+        row_to_tag = tf_idf_components['row_to_tag']
+        bad_labels = {"by_unknown_artist", "by_conditional_dnp", "unknown_artist", "conditional_dnp"}
+        top_artists_raw = []
+        for idx_i, sim in zip(art_inds, art_sims):
+            tag = row_to_tag.get(int(idx_i), "")
+            if not tag:
+                continue
+            # Normalize spaces to underscores for reliable checks
+            norm = tag.replace(" ", "_")
+            # Drop known non-artist placeholders
+            if norm in bad_labels:
+                continue
+            # Accept either "by_foo" or plain "foo"
+            base = norm[3:] if norm.startswith("by_") else norm
+            # Guard: only keep if this *really* is an artist we know
+            if not is_artist(base):
+                continue
+            name_disp = base.replace("_", " ")
+            top_artists_raw.append((name_disp, float(sim)))
+        if not top_artists_raw:
+            logging.debug("No artist hits. First few neighbor labels: %s",
+                          [row_to_tag.get(int(i), "") for i in art_inds[:10]])
+        # take the best unique names, in order
+        seen = set()
+        deduped = []
+        for n, s in top_artists_raw:
+            if n not in seen:
+                deduped.append((n, s))
+                seen.add(n)
+            if len(deduped) >= top_n:
+                break
+        top_artists = deduped
+        logging.debug("Top artists (n=%d): %s", len(top_artists), top_artists)
         top_artists_str = create_top_artists_table(top_artists)
         dynamic_prompts_formatted_artists = "{" + "|".join([artist for artist, _ in top_artists]) + "}"
+        dynamic_prompts_formatted_artists = "{" + "|".join(
+            [escape_parens_for_display(artist) for artist, _ in top_artists]
+        ) + "}"
         image_galleries = []
         for root, dirs, files in os.walk(sample_images_directory_path):
             for name in dirs:
+                baseline, artists = generate_artist_image_tuples([name for name, _ in top_artists], os.path.join(root, name))
+                dir_path = os.path.join(root, name)
+                baseline, artists = generate_artist_image_tuples([n for n, _ in top_artists], dir_path)
+                logging.debug("Gallery built for %s -> baseline=%d, artists_found=%d", dir_path, len(baseline), len(artists))
                 image_galleries.append(baseline)  # Add baseline as its own gallery item
                 image_galleries.append(artists)  # Extend the list with artist tuples
         return (unseen_tags_data, bad_tags_illustrated_html, suggested_tags_html_content, top_artists_str, dynamic_prompts_formatted_artists, *image_galleries)
+    except ParseError:
+        # Build empty galleries so the tuple length matches the declared outputs
+        empty_galleries = []
+        for _root, _dirs, _files in os.walk(sample_images_directory_path):
+            for _ in _dirs:
+                empty_galleries.extend([[], []])  # one empty list per Gallery component
+        return (
+            create_html_placeholder(title="Unknown Tags", content="Parse Error"),
+            "Parse Error: Check for mismatched parentheses or something",
+            create_html_placeholder(title="Suggested Tags"),
+            "",  # top_artists
+            "",  # dynamic_prompts
+            *empty_galleries,
+        )
 with gr.Blocks(css=css) as app:
     with gr.Group():
         with gr.Row():
+            with gr.Column(scale=3, elem_classes=["prompt-col"]):
+                image_tags = gr.Textbox(
+                    label="Enter Prompt",
+                    placeholder="e.g. fox, outside, detailed background, ...",
+                    lines=1  # Enter submits (see .submit() below)
+                )
                 bad_tags_illustrated_string = gr.HTML()
             with gr.Column(scale=1):
                 gr.HTML(
             with gr.Group():
                 with gr.Row():
                     context_similarity_weight = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Context Similarity Weight")
+                    allow_nsfw = gr.Checkbox(label="Allow NSFW Tag Suggestions", value=False)
                 with gr.Row():
                     with gr.Column(scale=2):
+                        unseen_tags = gr.HTML(
+                            label="Unknown Tags",
+                            value=create_html_placeholder(title="Unknown Tags"),
+                            elem_id="unseen_html",
+                            elem_classes=["scroll-fade"],
+                        )
                     with gr.Column(scale=1):
+                        suggested_tags = gr.HTML(
+                            label="Suggested Tags",
+                            value=create_html_placeholder(title="Suggested Tags"),
+                            elem_id="suggested_html",
+                            elem_classes=["scroll-fade"],
+                        )
         with gr.Column(scale=1):
             with gr.Group():
                 num_artists = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists")
+                top_artists = gr.HTML(
+                    label="Top Artists",
+                    value=create_html_placeholder(title="Top Artists"),
+                    elem_id="artists_html",
+                    elem_classes=["scroll-fade"],
+                )
+                gr.HTML("""
+                    <script>
+                    (function(){
+                      function wire(id){
+                        const host = document.getElementById(id);
+                        if (!host) return;
+                        // Always use the *inner* .scrollable-content as the scroller
+                        const getScroller = () => host.querySelector('.scrollable-content') || host;
+                        let scroller = getScroller();
+                        // Set CSS var so the fade blends with host background
+                        const bg = getComputedStyle(host).backgroundColor;
+                        host.style.setProperty('--host-bg', bg);
+                        const refresh = () => {
+                          // guard for fractional pixels across browsers
+                          const atBottom = Math.ceil(scroller.scrollTop + scroller.clientHeight) >= scroller.scrollHeight;
+                          host.classList.toggle('at-bottom', atBottom);
+                        };
+                        // (Re)attach scroll listener to the current scroller
+                        const attach = (el) => {
+                          if (!el) return;
+                          el.addEventListener('scroll', refresh, {passive:true});
+                          // initial state
+                          refresh();
+                        };
+                        attach(scroller);
+                        // If Gradio replaces inner HTML, re-wire to new scroller
+                        new MutationObserver(() => {
+                          const next = getScroller();
+                          if (next && next !== scroller) {
+                            scroller.removeEventListener && scroller.removeEventListener('scroll', refresh);
+                            scroller = next;
+                            attach(scroller);
+                          }
+                          // background might change with themes; keep it fresh
+                          const newBg = getComputedStyle(host).backgroundColor;
+                          host.style.setProperty('--host-bg', newBg);
+                          refresh();
+                        }).observe(host, {childList: true, subtree: true});
+                        // Also respond to resizes
+                        new ResizeObserver(refresh).observe(host);
+                      }
+                      ['unseen_html','suggested_html','artists_html'].forEach(wire);
+                    })();
+                    </script>
+                    """, visible=False)
                 dynamic_prompts = gr.Textbox(label="Dynamic Prompts Format", info="For if you're using the Automatic1111 webui (https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the Dynamic Prompts extension activated (https://github.com/adieyal/sd-dynamic-prompts) and want to try them all individually.")
     galleries = []
     for root, dirs, files in os.walk(sample_images_directory_path):
         for name in dirs:
             with gr.Row():
                 baseline = gr.Gallery(allow_preview=False, rows=1, columns=1, height=420, scale=3)
+                styles = gr.Gallery(allow_preview=False, rows=2, columns=5, height=420, scale=8)
                 galleries.extend([baseline, styles])
     submit_button.click(
         inputs=[image_tags, num_artists, context_similarity_weight, allow_nsfw],
         outputs=[unseen_tags, bad_tags_illustrated_string, suggested_tags, top_artists, dynamic_prompts] + galleries
     )
+    # Also run when pressing Enter in the prompt box
+    image_tags.submit(
+        find_similar_artists,
+        inputs=[image_tags, num_artists, context_similarity_weight, allow_nsfw],
+        outputs=[unseen_tags, bad_tags_illustrated_string, suggested_tags, top_artists, dynamic_prompts] + galleries
+    )
     gr.Markdown(faq_content)

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ gradio==4.44.1
 gradio-client==1.3.0
 fastapi==0.116.1
 starlette==0.47.3
 numpy==1.25.1
 scikit-learn==1.4.1.post1
 h5py==3.8.0

 gradio-client==1.3.0
 fastapi==0.116.1
 starlette==0.47.3
+hnswlib==0.8.0
 numpy==1.25.1
 scikit-learn==1.4.1.post1
 h5py==3.8.0