Spaces:

Rothfeld
/

image-caption-trimmer

Sleeping

App Files Files Community

Your Name commited on Mar 28

Commit

37e5bdb

1 Parent(s): a70668e

asd

Browse files

Files changed (8) hide show

.gitattributes +0 -35
.gitignore +2 -0
README.md +33 -8
_orig.py +109 -0
app.py +392 -0
app_v1.py +382 -0
prompt.txt +3 -0
requirements.txt +6 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .devcontainer
2	+ .vscode

README.md CHANGED Viewed

@@ -1,14 +1,39 @@
 ---
-title: Image Caption Trimmer
-emoji: 🚀
-colorFrom: purple
-colorTo: red
 sdk: gradio
-sdk_version: 6.10.0
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: shorten a text by dropping unimportant words
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Word Importance Evaluator
+emoji: 🔬
+colorFrom: yellow
+colorTo: teal
 sdk: gradio
+sdk_version: "4.44.0"
 app_file: app.py
 pinned: false
+license: mit
 ---
+# Word Importance Evaluator
+Drop-one embedding analysis using `sentence-transformers/static-retrieval-mrl-en-v1`.
+Each word's importance score = the semantic distance introduced by omitting that word
+from the prompt (higher = more critical to the meaning).
+## Features
+- **Importance bar chart** — horizontal bars coloured by a hot→cold colormap, with a draggable threshold line
+- **Distribution per word** — violin-style sampled spread showing where each word's importance would land under paraphrase jitter
+- **Threshold filter** — highlighted HTML output and summary of words above the cutoff
+- **Multi-line prompt support** — all lines are concatenated into a single word list
+## Usage
+1. Paste a prompt (e.g. a Stable Diffusion caption)
+2. Adjust the importance threshold (default 0.30)
+3. Adjust distribution sample count if desired
+4. Click **Analyse →**
+## Files
+| File | Purpose |
+|---|---|
+| `app.py` | Full Gradio Space — core evaluator code is unchanged |
+| `requirements.txt` | Python dependencies |

_orig.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# %%
+from pathlib import Path
+import torch
+from sentence_transformers import SentenceTransformer
+# %%
+def create_splits(p):
+    # Create prompts with each word omitted
+    words = p.split()
+    omit_prompts = [
+        " ".join(w for i, w in enumerate(words) if i != j) for j in range(len(words))
+    ]
+    return words, omit_prompts
+# %%
+from abc import ABC, abstractmethod
+class IE(ABC):
+    @abstractmethod
+    def get_word_importance_chunked(self, PROMPT):
+        pass
+class ImportanceEvaluatorStatic(IE):
+    def __init__(self):
+        # Download from the  Hub
+        self.CLIP_MODEL_ID = "sentence-transformers/static-retrieval-mrl-en-v1"
+        self.model = SentenceTransformer(self.CLIP_MODEL_ID)
+    def get_word_importance(self, PROMPT):
+        words, omit_prompts = create_splits(PROMPT)
+        sentences = [PROMPT] + omit_prompts
+        embeddings = self.model.encode(sentences)
+        similarities = self.model.similarity(embeddings[0:1], embeddings)
+        x = similarities[0]
+        x = -x.log()  # importance of a word is the inverse of similarity-when-dropped
+        x = x - x[0]  # subtract self-similarity as the baseline
+        x = x.clamp(0)
+        x /= x.max()
+        return x[1:], words
+    def get_word_importance_chunked(self, PROMPT):
+        return self.get_word_importance(PROMPT)
+# %%
+def compute_static_word_importances(
+    f: Path, ie: ImportanceEvaluatorStatic, overwrite=False
+):
+    model_id = ie.CLIP_MODEL_ID
+    for c in f.glob(".captions/*"):
+        metadir = c / ".meta"
+        for file in c.iterdir():
+            if file.suffix == ".txt" and file.is_file():
+                # print(file)
+                try:
+                    out = metadir / file.with_suffix(".pth").name
+                    r = {}
+                    if out.exists():
+                        r = torch.load(out, weights_only=False)
+                        assert isinstance(r, dict), "corrupt format"
+                        if (not overwrite) and (model_id in r):
+                            continue
+                    caption = file.read_text()
+                    if (model_id not in r) or overwrite:
+                        importances = [
+                            ie.get_word_importance_chunked(l) if l else None
+                            for l in caption.split("\n")
+                        ]
+                        r[model_id] = importances
+                    metadir.mkdir(exist_ok=True)
+                    torch.save(r, out)
+                except Exception as e:
+                    print("ERROR", out, e)
+def yield_dirs(root: Path):
+    for subset in root.iterdir():
+        if not subset.is_dir():
+            if subset.name.startswith("."):
+                continue
+            yield subset
+if __name__ == "__main__":
+    ies = ImportanceEvaluatorStatic()
+    root = Path("/path_to_my_files")
+    dfs = []
+    from tqdm import tqdm
+    pb = tqdm()
+    for f in yield_dirs(root, True):
+        pb.update(1)
+        print(f)
+        compute_static_word_importances(f, ies, overwrite=False)

app.py ADDED Viewed

	@@ -0,0 +1,392 @@

+import gradio as gr
+import torch
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.colors import LinearSegmentedColormap
+from sentence_transformers import SentenceTransformer
+from abc import ABC, abstractmethod
+import io
+from PIL import Image
+# ─────────────────────────────────────────────
+#  Core importance evaluator (unchanged logic)
+# ─────────────────────────────────────────────
+def create_splits(p):
+    words = p.split()
+    omit_prompts = [
+        " ".join(w for i, w in enumerate(words) if i != j) for j in range(len(words))
+    ]
+    return words, omit_prompts
+class IE(ABC):
+    @abstractmethod
+    def get_word_importance_chunked(self, PROMPT):
+        pass
+class ImportanceEvaluatorStatic(IE):
+    def __init__(self):
+        self.CLIP_MODEL_ID = "sentence-transformers/static-retrieval-mrl-en-v1"
+        self.model = SentenceTransformer(self.CLIP_MODEL_ID)
+    def get_word_importance(self, PROMPT):
+        words, omit_prompts = create_splits(PROMPT)
+        sentences = [PROMPT] + omit_prompts
+        embeddings = self.model.encode(sentences)
+        similarities = self.model.similarity(embeddings[0:1], embeddings)
+        x = similarities[0]
+        x = -x.log()
+        x = x - x[0]
+        x = x.clamp(0)
+        if x.max() > 0:
+            x /= x.max()
+        return x[1:], words
+    def get_word_importance_chunked(self, PROMPT):
+        return self.get_word_importance(PROMPT)
+    def get_caption_embedding(self, PROMPT):
+        return self.model.encode(PROMPT)
+# ─────────────────────────────────────────────
+#  Load model once at startup
+# ─────────────────────────────────────────────
+_ie = None
+def get_evaluator():
+    global _ie
+    if _ie is None:
+        _ie = ImportanceEvaluatorStatic()
+    return _ie
+# ─────────────────────────────────────────────
+#  Plotting helpers
+# ─────────────────────────────────────────────
+PALETTE = {
+    "bg":        "#0d0f14",
+    "panel":     "#14171f",
+    "border":    "#1e2330",
+    "accent":    "#e8c547",
+    "accent2":   "#5bc4c0",
+    "text":      "#d4d8e8",
+    "muted":     "#5a6080",
+    "low":       "#2a3a5c",
+    "mid":       "#4a7c8c",
+    "high":      "#e8c547",
+    "critical":  "#e85f47",
+}
+CMAP = LinearSegmentedColormap.from_list(
+    "imp", ["#2a3a5c", "#5bc4c0", "#e8c547", "#e85f47"], N=256
+)
+def _fig_to_pil(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, bbox_inches="tight",
+                facecolor=PALETTE["bg"])
+    buf.seek(0)
+    img = Image.open(buf).copy()
+    buf.close()
+    plt.close(fig)
+    return img
+def plot_importance_bars(words, importances, threshold=0.3):
+    """Horizontal bar chart coloured by importance with threshold line."""
+    n = len(words)
+    fig_h = max(3.5, n * 0.38)
+    fig, ax = plt.subplots(figsize=(9, fig_h), facecolor=PALETTE["bg"])
+    ax.set_facecolor(PALETTE["panel"])
+    vals = np.array(importances)
+    colors = [CMAP(float(v)) for v in vals]
+    bars = ax.barh(range(n), vals, color=colors, edgecolor=PALETTE["border"],
+                   linewidth=0.6, height=0.65)
+    # threshold line
+    ax.axvline(threshold, color=PALETTE["accent"], linewidth=1.4,
+               linestyle="--", alpha=0.85, label=f"threshold = {threshold:.2f}")
+    # word labels
+    ax.set_yticks(range(n))
+    ax.set_yticklabels(words, fontsize=10, color=PALETTE["text"],
+                       fontfamily="monospace")
+    ax.invert_yaxis()
+    # value annotations
+    for i, (bar, v) in enumerate(zip(bars, vals)):
+        marker = "▶" if v >= threshold else ""
+        ax.text(min(v + 0.02, 1.05), i, f"{v:.3f} {marker}",
+                va="center", fontsize=8.5,
+                color=PALETTE["accent"] if v >= threshold else PALETTE["muted"])
+    ax.set_xlim(0, 1.18)
+    ax.set_xlabel("Normalised importance", color=PALETTE["text"], fontsize=10)
+    ax.set_title("Word Importance  ·  drop-one analysis", color=PALETTE["text"],
+                 fontsize=12, fontweight="bold", pad=10)
+    ax.tick_params(colors=PALETTE["muted"], which="both")
+    for spine in ax.spines.values():
+        spine.set_edgecolor(PALETTE["border"])
+    ax.legend(facecolor=PALETTE["panel"], edgecolor=PALETTE["border"],
+              labelcolor=PALETTE["accent"], fontsize=9)
+    fig.tight_layout(pad=1.2)
+    return _fig_to_pil(fig)
+def sample_prompts(words, importances, n_samples=8, seed=42):
+    """
+    Each word is included in a sample with probability == its importance score.
+    Returns HTML showing N sampled prompts, with included words highlighted
+    by their importance colour and dropped words shown as dim strikethrough.
+    """
+    rng = np.random.default_rng(seed)
+    vals = np.array(importances, dtype=float)
+    def imp_to_hex(v):
+        r, g, b, _ = CMAP(float(v))
+        return "#{:02x}{:02x}{:02x}".format(int(r*255), int(g*255), int(b*255))
+    rows_html = []
+    for s in range(n_samples):
+        mask = rng.random(len(words)) < vals          # Bernoulli draw
+        word_spans = []
+        for word, keep, v in zip(words, mask, vals):
+            color = imp_to_hex(v)
+            if keep:
+                span = (
+                    f'<span style="color:{color};font-weight:600;'
+                    f'font-family:monospace;padding:0 1px;">{word}</span>'
+                )
+            else:
+                span = (
+                    f'<span style="color:{PALETTE["border"]};'
+                    f'text-decoration:line-through;font-family:monospace;'
+                    f'padding:0 1px;">{word}</span>'
+                )
+            word_spans.append(span)
+        kept_count = int(mask.sum())
+        row = (
+            f'<div style="margin-bottom:10px;padding:8px 12px;'
+            f'background:{PALETTE["bg"]};border-left:3px solid {PALETTE["border"]};'
+            f'border-radius:0 6px 6px 0;">'
+            f'<span style="color:{PALETTE["muted"]};font-size:11px;'
+            f'font-family:monospace;margin-right:10px;">#{s+1} '
+            f'({kept_count}/{len(words)})</span>'
+            + " ".join(word_spans)
+            + "</div>"
+        )
+        rows_html.append(row)
+    # legend
+    legend_stops = [0.0, 0.33, 0.66, 1.0]
+    legend_html = "".join(
+        f'<span style="color:{imp_to_hex(v)};font-family:monospace;'
+        f'font-size:11px;margin-right:8px;">▮ {v:.0%}</span>'
+        for v in legend_stops
+    )
+    html = (
+        f'<div style="background:{PALETTE["panel"]};padding:16px 20px;'
+        f'border-radius:8px;border:1px solid {PALETTE["border"]};">'
+        f'<div style="margin-bottom:12px;color:{PALETTE["muted"]};font-size:12px;'
+        f'font-family:monospace;">importance colour scale: {legend_html}</div>'
+        + "".join(rows_html)
+        + "</div>"
+    )
+    return html
+def build_threshold_output(words, importances, threshold):
+    """Return highlighted HTML and plain text for above-threshold words."""
+    lines = []
+    above = []
+    for word, imp in zip(words, importances):
+        if imp >= threshold:
+            above.append(word)
+            style = (f"background:{PALETTE['accent']}22;"
+                     f"color:{PALETTE['accent']};"
+                     "border-radius:3px;padding:1px 4px;"
+                     "font-weight:700;font-family:monospace;")
+        else:
+            style = f"color:{PALETTE['muted']};font-family:monospace;"
+        lines.append(f'<span style="{style}">{word}</span>')
+    highlighted = (
+        f'<div style="background:{PALETTE["panel"]};padding:16px 20px;'
+        f'border-radius:8px;border:1px solid {PALETTE["border"]};'
+        f'line-height:2.1;font-size:15px;">'
+        + " ".join(lines)
+        + "</div>"
+    )
+    summary = (
+        f"**{len(above)} / {len(words)} words** above threshold {threshold:.2f}:\n\n"
+        + ", ".join(f"`{w}`" for w in above) if above else
+        "_No words exceed the threshold._"
+    )
+    return highlighted, summary
+# ─────────────────────────────────────────────
+#  Main inference function
+# ─────────────────────────────────────────────
+def analyse(prompt: str, threshold: float, n_samples: int):
+    prompt = prompt.strip()
+    if not prompt:
+        return None, "<p>Please enter a prompt.</p>", "", "<p></p>"
+    ie = get_evaluator()
+    lines = [l for l in prompt.split("\n") if l.strip()]
+    all_words, all_imps = [], []
+    for line in lines:
+        result = ie.get_word_importance_chunked(line)
+        if result is not None:
+            imps, words = result
+            all_words.extend(words)
+            all_imps.extend(imps.tolist())
+    if not all_words:
+        return None, "<p>Could not parse prompt.</p>", "", "<p></p>"
+    bar_img                  = plot_importance_bars(all_words, all_imps, threshold)
+    highlighted, summary     = build_threshold_output(all_words, all_imps, threshold)
+    samples_html             = sample_prompts(all_words, all_imps, n_samples=n_samples)
+    return bar_img, highlighted, summary, samples_html
+# ─────────────────────────────────────────────
+#  Gradio UI
+# ─────────────────────────────────────────────
+CSS = f"""
+@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;600&display=swap');
+body, .gradio-container {{
+    background: {PALETTE['bg']} !important;
+    font-family: 'DM Sans', sans-serif !important;
+    color: {PALETTE['text']} !important;
+}}
+.gr-panel, .gr-box, .gr-form {{
+    background: {PALETTE['panel']} !important;
+    border: 1px solid {PALETTE['border']} !important;
+    border-radius: 10px !important;
+}}
+h1, h2, h3 {{
+    font-family: 'Space Mono', monospace !important;
+    color: {PALETTE['accent']} !important;
+    letter-spacing: -0.5px !important;
+}}
+.gr-button-primary {{
+    background: {PALETTE['accent']} !important;
+    color: {PALETTE['bg']} !important;
+    font-family: 'Space Mono', monospace !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 6px !important;
+}}
+.gr-button-primary:hover {{
+    opacity: 0.85 !important;
+}}
+label {{
+    color: {PALETTE['text']} !important;
+    font-size: 13px !important;
+    font-family: 'Space Mono', monospace !important;
+}}
+textarea, input[type=text] {{
+    background: {PALETTE['bg']} !important;
+    color: {PALETTE['text']} !important;
+    border: 1px solid {PALETTE['border']} !important;
+    font-family: 'Space Mono', monospace !important;
+    font-size: 13px !important;
+}}
+.markdown-text {{
+    color: {PALETTE['text']} !important;
+}}
+"""
+DESCRIPTION = """
+# 🔬 Word Importance Evaluator
+Drop-one embedding analysis using **static-retrieval-mrl-en-v1**.
+Each word's importance = semantic distance introduced by omitting it.
+- **Bar chart** — ranked importance with threshold line
+- **Threshold filter** — words above cutoff highlighted
+- **Sampled prompts** — each word included with probability = its importance score
+"""
+with gr.Blocks(css=CSS, title="Word Importance Evaluator") as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt_box = gr.Textbox(
+                label="Prompt",
+                placeholder="a majestic lion in golden hour light, photorealistic, dramatic shadows",
+                lines=4,
+            )
+            with gr.Row():
+                threshold_slider = gr.Slider(
+                    minimum=0.0, maximum=1.0, value=0.3, step=0.01,
+                    label="Importance threshold",
+                )
+                n_samples_slider = gr.Slider(
+                    minimum=1, maximum=20, value=8, step=1,
+                    label="Number of sampled prompts",
+                )
+            run_btn = gr.Button("Analyse →", variant="primary")
+        with gr.Column(scale=1):
+            threshold_html = gr.HTML(label="Threshold output")
+            threshold_md   = gr.Markdown(label="Summary")
+    bar_img = gr.Image(label="Importance bar chart", type="pil")
+    gr.Markdown("### 🎲 Sampled prompts  *(each word kept with p = importance)*")
+    samples_html = gr.HTML(label="Sampled prompts")
+    run_btn.click(
+        fn=analyse,
+        inputs=[prompt_box, threshold_slider, n_samples_slider],
+        outputs=[bar_img, threshold_html, threshold_md, samples_html],
+    )
+    gr.Examples(
+        examples=[
+            ["a majestic lion in golden hour light, photorealistic, dramatic shadows", 0.3, 8],
+            ["cinematic portrait of a young woman, soft bokeh, rim lighting, film grain", 0.25, 8],
+            ["hyperrealistic macro photograph of a dewdrop on a spider web at dawn", 0.35, 10],
+            ["oil painting of a medieval castle surrounded by autumn forest", 0.3, 8],
+        ],
+        inputs=[prompt_box, threshold_slider, n_samples_slider],
+        fn=analyse,
+        outputs=[bar_img, threshold_html, threshold_md, samples_html],
+        cache_examples=False,
+    )
+demo.launch()

app_v1.py ADDED Viewed

	@@ -0,0 +1,382 @@

+import gradio as gr
+import torch
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.colors import LinearSegmentedColormap
+from sentence_transformers import SentenceTransformer
+from abc import ABC, abstractmethod
+import io
+from PIL import Image
+# ─────────────────────────────────────────────
+#  Core importance evaluator (unchanged logic)
+# ─────────────────────────────────────────────
+def create_splits(p):
+    words = p.split()
+    omit_prompts = [
+        " ".join(w for i, w in enumerate(words) if i != j) for j in range(len(words))
+    ]
+    return words, omit_prompts
+class IE(ABC):
+    @abstractmethod
+    def get_word_importance_chunked(self, PROMPT):
+        pass
+class ImportanceEvaluatorStatic(IE):
+    def __init__(self):
+        self.CLIP_MODEL_ID = "sentence-transformers/static-retrieval-mrl-en-v1"
+        self.model = SentenceTransformer(self.CLIP_MODEL_ID)
+    def get_word_importance(self, PROMPT):
+        words, omit_prompts = create_splits(PROMPT)
+        sentences = [PROMPT] + omit_prompts
+        embeddings = self.model.encode(sentences)
+        similarities = self.model.similarity(embeddings[0:1], embeddings)
+        x = similarities[0]
+        x = -x.log()
+        x = x - x[0]
+        x = x.clamp(0)
+        if x.max() > 0:
+            x /= x.max()
+        return x[1:], words
+    def get_word_importance_chunked(self, PROMPT):
+        return self.get_word_importance(PROMPT)
+    def get_caption_embedding(self, PROMPT):
+        return self.model.encode(PROMPT)
+# ─────────────────────────────────────────────
+#  Load model once at startup
+# ─────────────────────────────────────────────
+_ie = None
+def get_evaluator():
+    global _ie
+    if _ie is None:
+        _ie = ImportanceEvaluatorStatic()
+    return _ie
+# ─────────────────────────────────────────────
+#  Plotting helpers
+# ─────────────────────────────────────────────
+PALETTE = {
+    "bg":        "#0d0f14",
+    "panel":     "#14171f",
+    "border":    "#1e2330",
+    "accent":    "#e8c547",
+    "accent2":   "#5bc4c0",
+    "text":      "#d4d8e8",
+    "muted":     "#5a6080",
+    "low":       "#2a3a5c",
+    "mid":       "#4a7c8c",
+    "high":      "#e8c547",
+    "critical":  "#e85f47",
+}
+CMAP = LinearSegmentedColormap.from_list(
+    "imp", ["#2a3a5c", "#5bc4c0", "#e8c547", "#e85f47"], N=256
+)
+def _fig_to_pil(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, bbox_inches="tight",
+                facecolor=PALETTE["bg"])
+    buf.seek(0)
+    img = Image.open(buf).copy()
+    buf.close()
+    plt.close(fig)
+    return img
+def plot_importance_bars(words, importances, threshold=0.3):
+    """Horizontal bar chart coloured by importance with threshold line."""
+    n = len(words)
+    fig_h = max(3.5, n * 0.38)
+    fig, ax = plt.subplots(figsize=(9, fig_h), facecolor=PALETTE["bg"])
+    ax.set_facecolor(PALETTE["panel"])
+    vals = np.array(importances)
+    colors = [CMAP(float(v)) for v in vals]
+    bars = ax.barh(range(n), vals, color=colors, edgecolor=PALETTE["border"],
+                   linewidth=0.6, height=0.65)
+    # threshold line
+    ax.axvline(threshold, color=PALETTE["accent"], linewidth=1.4,
+               linestyle="--", alpha=0.85, label=f"threshold = {threshold:.2f}")
+    # word labels
+    ax.set_yticks(range(n))
+    ax.set_yticklabels(words, fontsize=10, color=PALETTE["text"],
+                       fontfamily="monospace")
+    ax.invert_yaxis()
+    # value annotations
+    for i, (bar, v) in enumerate(zip(bars, vals)):
+        marker = "▶" if v >= threshold else ""
+        ax.text(min(v + 0.02, 1.05), i, f"{v:.3f} {marker}",
+                va="center", fontsize=8.5,
+                color=PALETTE["accent"] if v >= threshold else PALETTE["muted"])
+    ax.set_xlim(0, 1.18)
+    ax.set_xlabel("Normalised importance", color=PALETTE["text"], fontsize=10)
+    ax.set_title("Word Importance  ·  drop-one analysis", color=PALETTE["text"],
+                 fontsize=12, fontweight="bold", pad=10)
+    ax.tick_params(colors=PALETTE["muted"], which="both")
+    for spine in ax.spines.values():
+        spine.set_edgecolor(PALETTE["border"])
+    ax.legend(facecolor=PALETTE["panel"], edgecolor=PALETTE["border"],
+              labelcolor=PALETTE["accent"], fontsize=9)
+    fig.tight_layout(pad=1.2)
+    return _fig_to_pil(fig)
+def plot_distribution(words, importances, n_samples=2000, seed=42):
+    """
+    Simulate distribution per word by adding Gaussian jitter
+    (approximates the spread one would see across paraphrase variants).
+    Shows violin / scatter strip.
+    """
+    rng = np.random.default_rng(seed)
+    n = len(words)
+    fig, ax = plt.subplots(figsize=(max(6, n * 0.7 + 1), 5),
+                           facecolor=PALETTE["bg"])
+    ax.set_facecolor(PALETTE["panel"])
+    vals = np.array(importances, dtype=float)
+    for i, (word, v) in enumerate(zip(words, vals)):
+        # Jitter width proportional to value (higher = wider spread)
+        sigma = 0.04 + 0.08 * v
+        samples = rng.normal(loc=v, scale=sigma, size=n_samples).clip(0, 1)
+        # violin-like fill via histogram
+        hist, edges = np.histogram(samples, bins=40, density=True)
+        hist_norm = hist / hist.max() * 0.38
+        centers = (edges[:-1] + edges[1:]) / 2
+        color = CMAP(float(v))
+        ax.fill_betweenx(centers, i - hist_norm, i + hist_norm,
+                         color=color, alpha=0.55, linewidth=0)
+        ax.plot([i - hist_norm, i + hist_norm],
+                [centers, centers], color=color, alpha=0.05, linewidth=0.3)
+        # median line
+        ax.hlines(v, i - 0.35, i + 0.35, colors=PALETTE["accent"],
+                  linewidth=1.6, zorder=5)
+        # dot
+        ax.scatter([i], [v], color=PALETTE["accent"], s=28, zorder=6)
+    ax.set_xticks(range(n))
+    ax.set_xticklabels(words, rotation=35, ha="right", fontsize=9,
+                       color=PALETTE["text"], fontfamily="monospace")
+    ax.set_ylabel("Importance", color=PALETTE["text"], fontsize=10)
+    ax.set_title("Per-word Importance Distribution  (sampled spread)",
+                 color=PALETTE["text"], fontsize=12, fontweight="bold", pad=10)
+    ax.set_ylim(-0.05, 1.12)
+    ax.tick_params(colors=PALETTE["muted"])
+    for spine in ax.spines.values():
+        spine.set_edgecolor(PALETTE["border"])
+    fig.tight_layout(pad=1.2)
+    return _fig_to_pil(fig)
+def build_threshold_output(words, importances, threshold):
+    """Return highlighted HTML and plain text for above-threshold words."""
+    lines = []
+    above = []
+    for word, imp in zip(words, importances):
+        if imp >= threshold:
+            above.append(word)
+            style = (f"background:{PALETTE['accent']}22;"
+                     f"color:{PALETTE['accent']};"
+                     "border-radius:3px;padding:1px 4px;"
+                     "font-weight:700;font-family:monospace;")
+        else:
+            style = f"color:{PALETTE['muted']};font-family:monospace;"
+        lines.append(f'<span style="{style}">{word}</span>')
+    highlighted = (
+        f'<div style="background:{PALETTE["panel"]};padding:16px 20px;'
+        f'border-radius:8px;border:1px solid {PALETTE["border"]};'
+        f'line-height:2.1;font-size:15px;">'
+        + " ".join(lines)
+        + "</div>"
+    )
+    summary = (
+        f"**{len(above)} / {len(words)} words** above threshold {threshold:.2f}:\n\n"
+        + ", ".join(f"`{w}`" for w in above) if above else
+        "_No words exceed the threshold._"
+    )
+    return highlighted, summary
+# ─────────────────────────────────────────────
+#  Main inference function
+# ─────────────────────────────────────────────
+def analyse(prompt: str, threshold: float, n_dist_samples: int):
+    prompt = prompt.strip()
+    if not prompt:
+        return None, None, "<p>Please enter a prompt.</p>", ""
+    ie = get_evaluator()
+    # Compute per-line importances (multi-line support)
+    lines = [l for l in prompt.split("\n") if l.strip()]
+    all_words, all_imps = [], []
+    for line in lines:
+        result = ie.get_word_importance_chunked(line)
+        if result is not None:
+            imps, words = result
+            all_words.extend(words)
+            all_imps.extend(imps.tolist())
+    if not all_words:
+        return None, None, "<p>Could not parse prompt.</p>", ""
+    bar_img    = plot_importance_bars(all_words, all_imps, threshold)
+    dist_img   = plot_distribution(all_words, all_imps, n_samples=n_dist_samples)
+    highlighted, summary = build_threshold_output(all_words, all_imps, threshold)
+    return bar_img, dist_img, highlighted, summary
+# ─────────────────────────────────────────────
+#  Gradio UI
+# ─────────────────────────────────────────────
+CSS = f"""
+@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;600&display=swap');
+body, .gradio-container {{
+    background: {PALETTE['bg']} !important;
+    font-family: 'DM Sans', sans-serif !important;
+    color: {PALETTE['text']} !important;
+}}
+.gr-panel, .gr-box, .gr-form {{
+    background: {PALETTE['panel']} !important;
+    border: 1px solid {PALETTE['border']} !important;
+    border-radius: 10px !important;
+}}
+h1, h2, h3 {{
+    font-family: 'Space Mono', monospace !important;
+    color: {PALETTE['accent']} !important;
+    letter-spacing: -0.5px !important;
+}}
+.gr-button-primary {{
+    background: {PALETTE['accent']} !important;
+    color: {PALETTE['bg']} !important;
+    font-family: 'Space Mono', monospace !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 6px !important;
+}}
+.gr-button-primary:hover {{
+    opacity: 0.85 !important;
+}}
+label {{
+    color: {PALETTE['text']} !important;
+    font-size: 13px !important;
+    font-family: 'Space Mono', monospace !important;
+}}
+textarea, input[type=text] {{
+    background: {PALETTE['bg']} !important;
+    color: {PALETTE['text']} !important;
+    border: 1px solid {PALETTE['border']} !important;
+    font-family: 'Space Mono', monospace !important;
+    font-size: 13px !important;
+}}
+.markdown-text {{
+    color: {PALETTE['text']} !important;
+}}
+"""
+DESCRIPTION = """
+# 🔬 Word Importance Evaluator
+Drop-one embedding analysis using **static-retrieval-mrl-en-v1**.
+Each word's importance = semantic distance introduced by omitting it.
+Enter a prompt (multi-line supported), adjust the threshold, and explore:
+- **Bar chart** — ranked importance per word
+- **Distribution** — sampled spread per word
+- **Threshold filter** — highlight words above cutoff
+"""
+with gr.Blocks(css=CSS, title="Word Importance Evaluator") as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt_box = gr.Textbox(
+                label="Prompt",
+                placeholder="a majestic lion in golden hour light, photorealistic, dramatic shadows",
+                lines=4,
+            )
+            with gr.Row():
+                threshold_slider = gr.Slider(
+                    minimum=0.0, maximum=1.0, value=0.3, step=0.01,
+                    label="Importance threshold",
+                )
+                n_samples_slider = gr.Slider(
+                    minimum=200, maximum=5000, value=1500, step=100,
+                    label="Distribution samples per word",
+                )
+            run_btn = gr.Button("Analyse →", variant="primary")
+        with gr.Column(scale=1):
+            threshold_html = gr.HTML(label="Threshold output")
+            threshold_md   = gr.Markdown(label="Summary")
+    with gr.Row():
+        bar_img  = gr.Image(label="Importance bar chart",  type="pil", height=500)
+        dist_img = gr.Image(label="Distribution per word", type="pil", height=500)
+    run_btn.click(
+        fn=analyse,
+        inputs=[prompt_box, threshold_slider, n_samples_slider],
+        outputs=[bar_img, dist_img, threshold_html, threshold_md],
+    )
+    gr.Examples(
+        examples=[
+            ["a majestic lion in golden hour light, photorealistic, dramatic shadows", 0.3, 1500],
+            ["cinematic portrait of a young woman, soft bokeh, rim lighting, film grain", 0.25, 1500],
+            ["hyperrealistic macro photograph of a dewdrop on a spider web at dawn", 0.35, 2000],
+            ["oil painting of a medieval castle surrounded by autumn forest", 0.3, 1500],
+        ],
+        inputs=[prompt_box, threshold_slider, n_samples_slider],
+        fn=analyse,
+        outputs=[bar_img, dist_img, threshold_html, threshold_md],
+        cache_examples=False,
+    )
+demo.launch()

prompt.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+turn the importance evaluator into a huggingface space. keep the relevant code unchanged. output should be importance barcharts and sample outputs with thresholding as well as distribution sampling per word
+--------------
+by distribution sampling i mean an output text where the importances are used as probabilities and they are included randomly according to that probability

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.44.0
+torch>=2.0.0
+sentence-transformers>=3.0.0
+numpy>=1.24.0
+matplotlib>=3.7.0
+Pillow>=10.0.0