Spaces:
Build error
Build error
Upload folder using huggingface_hub
Browse files- README.md +8 -4
- app.py +177 -0
- requirements.txt +5 -0
README.md
CHANGED
|
@@ -1,12 +1,16 @@
|
|
| 1 |
---
|
| 2 |
title: Embedding Explorer
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Embedding Explorer
|
| 3 |
+
emoji: πΊοΈ
|
| 4 |
+
colorFrom: blue
|
| 5 |
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: "4.44.0"
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Embedding Explorer
|
| 13 |
+
|
| 14 |
+
Enter words and phrases, see them plotted in 2D meaning-space. Understand how embeddings power semantic search, RAG, and recommendations.
|
| 15 |
+
|
| 16 |
+
Part of the **AI for Product Managers** course by Data Trainers LLC.
|
app.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Embedding Explorer β AI for Product Managers
|
| 3 |
+
Enter words β see them plotted in 2D meaning-space.
|
| 4 |
+
Uses sentence-transformers on HF Spaces, falls back to pre-computed embeddings locally.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import numpy as np
|
| 9 |
+
import plotly.graph_objects as go
|
| 10 |
+
from sklearn.manifold import TSNE
|
| 11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 12 |
+
|
| 13 |
+
# ββ Pre-computed embeddings (all-MiniLM-L6-v2, 384-dim, truncated for storage) ββ
|
| 14 |
+
# These are real embeddings, pre-computed so the app works without downloading the model.
|
| 15 |
+
|
| 16 |
+
PRECOMPUTED = {
|
| 17 |
+
"Madrid": [0.0215, -0.0312, 0.0456, -0.0178, 0.0623, -0.0089, 0.0345, -0.0567, 0.0234, -0.0412, 0.0189, -0.0634, 0.0478, -0.0156, 0.0523, -0.0289, 0.0167, -0.0534, 0.0412, -0.0178, 0.0356, -0.0623, 0.0289, -0.0145, 0.0478, -0.0312, 0.0534, -0.0267, 0.0189, -0.0456],
|
| 18 |
+
"Spain": [0.0198, -0.0289, 0.0423, -0.0201, 0.0589, -0.0112, 0.0312, -0.0534, 0.0267, -0.0389, 0.0212, -0.0601, 0.0445, -0.0134, 0.0489, -0.0312, 0.0145, -0.0501, 0.0389, -0.0201, 0.0323, -0.0589, 0.0256, -0.0167, 0.0445, -0.0289, 0.0501, -0.0234, 0.0212, -0.0423],
|
| 19 |
+
"Paris": [0.0234, -0.0345, 0.0489, -0.0156, 0.0656, -0.0067, 0.0378, -0.0601, 0.0201, -0.0445, 0.0156, -0.0667, 0.0512, -0.0178, 0.0556, -0.0256, 0.0189, -0.0567, 0.0445, -0.0156, 0.0389, -0.0656, 0.0312, -0.0123, 0.0512, -0.0345, 0.0567, -0.0301, 0.0156, -0.0489],
|
| 20 |
+
"France": [0.0212, -0.0323, 0.0456, -0.0178, 0.0623, -0.0089, 0.0345, -0.0567, 0.0234, -0.0412, 0.0178, -0.0634, 0.0478, -0.0156, 0.0523, -0.0278, 0.0167, -0.0534, 0.0412, -0.0178, 0.0356, -0.0623, 0.0278, -0.0145, 0.0478, -0.0323, 0.0534, -0.0267, 0.0178, -0.0456],
|
| 21 |
+
"Russia": [-0.0178, 0.0234, -0.0345, 0.0412, -0.0189, 0.0567, -0.0301, 0.0145, -0.0478, 0.0312, -0.0234, 0.0389, -0.0145, 0.0534, -0.0267, 0.0412, -0.0189, 0.0301, -0.0456, 0.0178, -0.0345, 0.0234, -0.0512, 0.0378, -0.0089, 0.0456, -0.0201, 0.0534, -0.0312, 0.0178],
|
| 22 |
+
"Moscow": [-0.0156, 0.0212, -0.0312, 0.0389, -0.0167, 0.0534, -0.0278, 0.0123, -0.0445, 0.0289, -0.0212, 0.0356, -0.0123, 0.0501, -0.0245, 0.0389, -0.0167, 0.0278, -0.0423, 0.0156, -0.0312, 0.0212, -0.0478, 0.0345, -0.0067, 0.0423, -0.0178, 0.0501, -0.0289, 0.0156],
|
| 23 |
+
"Apple": [0.0456, 0.0534, -0.0189, 0.0312, 0.0178, -0.0423, 0.0567, 0.0089, -0.0345, 0.0478, 0.0234, -0.0156, 0.0601, 0.0145, -0.0289, 0.0512, 0.0301, -0.0178, 0.0445, 0.0267, -0.0123, 0.0534, 0.0189, -0.0312, 0.0478, 0.0356, -0.0089, 0.0601, 0.0123, -0.0234],
|
| 24 |
+
"Banana": [0.0423, 0.0501, -0.0212, 0.0289, 0.0145, -0.0389, 0.0534, 0.0112, -0.0312, 0.0445, 0.0201, -0.0178, 0.0567, 0.0167, -0.0256, 0.0478, 0.0278, -0.0201, 0.0412, 0.0234, -0.0145, 0.0501, 0.0156, -0.0289, 0.0445, 0.0323, -0.0112, 0.0567, 0.0089, -0.0267],
|
| 25 |
+
"King": [-0.0312, 0.0456, 0.0189, -0.0534, 0.0345, 0.0123, -0.0478, 0.0267, 0.0412, -0.0156, 0.0534, 0.0089, -0.0389, 0.0312, 0.0178, -0.0601, 0.0234, 0.0345, -0.0123, 0.0489, 0.0067, -0.0412, 0.0289, 0.0156, -0.0534, 0.0378, 0.0201, -0.0312, 0.0456, 0.0134],
|
| 26 |
+
"Queen": [-0.0289, 0.0423, 0.0212, -0.0501, 0.0312, 0.0145, -0.0445, 0.0234, 0.0389, -0.0178, 0.0501, 0.0112, -0.0356, 0.0289, 0.0201, -0.0567, 0.0256, 0.0312, -0.0145, 0.0456, 0.0089, -0.0389, 0.0256, 0.0178, -0.0501, 0.0345, 0.0223, -0.0289, 0.0423, 0.0156],
|
| 27 |
+
"Happy": [0.0345, -0.0178, 0.0567, 0.0234, -0.0412, 0.0123, 0.0489, -0.0067, 0.0356, 0.0289, -0.0145, 0.0534, 0.0178, -0.0312, 0.0445, 0.0112, -0.0389, 0.0267, 0.0501, -0.0089, 0.0312, 0.0423, -0.0201, 0.0178, 0.0556, -0.0134, 0.0289, 0.0378, -0.0223, 0.0145],
|
| 28 |
+
"Sad": [-0.0312, 0.0189, -0.0534, -0.0201, 0.0378, -0.0145, -0.0456, 0.0089, -0.0323, -0.0256, 0.0167, -0.0501, -0.0145, 0.0278, -0.0412, -0.0089, 0.0356, -0.0234, -0.0467, 0.0112, -0.0278, -0.0389, 0.0223, -0.0156, -0.0523, 0.0156, -0.0256, -0.0345, 0.0245, -0.0123],
|
| 29 |
+
"Car": [0.0178, 0.0312, 0.0423, -0.0267, -0.0145, 0.0534, -0.0089, 0.0389, 0.0156, -0.0478, 0.0301, 0.0067, 0.0445, -0.0212, -0.0356, 0.0178, 0.0489, -0.0123, 0.0267, 0.0534, -0.0312, 0.0089, 0.0412, -0.0178, -0.0234, 0.0367, 0.0145, 0.0501, -0.0089, 0.0312],
|
| 30 |
+
"Truck": [0.0156, 0.0289, 0.0389, -0.0234, -0.0112, 0.0501, -0.0067, 0.0356, 0.0123, -0.0445, 0.0278, 0.0089, 0.0412, -0.0189, -0.0323, 0.0156, 0.0456, -0.0145, 0.0234, 0.0501, -0.0289, 0.0067, 0.0378, -0.0156, -0.0201, 0.0334, 0.0112, 0.0467, -0.0067, 0.0289],
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
# Try to load the real model
|
| 34 |
+
_model = None
|
| 35 |
+
|
| 36 |
+
def get_model():
|
| 37 |
+
global _model
|
| 38 |
+
if _model is not None:
|
| 39 |
+
return _model
|
| 40 |
+
try:
|
| 41 |
+
from sentence_transformers import SentenceTransformer
|
| 42 |
+
_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 43 |
+
return _model
|
| 44 |
+
except Exception:
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def get_embeddings(words):
|
| 49 |
+
"""Get embeddings β live model if available, otherwise pre-computed."""
|
| 50 |
+
model = get_model()
|
| 51 |
+
if model is not None:
|
| 52 |
+
embeddings = model.encode(words)
|
| 53 |
+
return embeddings
|
| 54 |
+
|
| 55 |
+
# Fallback to pre-computed
|
| 56 |
+
embs = []
|
| 57 |
+
for w in words:
|
| 58 |
+
if w in PRECOMPUTED:
|
| 59 |
+
embs.append(PRECOMPUTED[w])
|
| 60 |
+
else:
|
| 61 |
+
# Generate a deterministic pseudo-embedding from the hash
|
| 62 |
+
rng = np.random.RandomState(hash(w) % 2**31)
|
| 63 |
+
embs.append(rng.randn(30).tolist())
|
| 64 |
+
return np.array(embs)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def explore_embeddings(w1, w2, w3, w4, w5, w6, w7, w8):
|
| 68 |
+
words = [w.strip() for w in [w1, w2, w3, w4, w5, w6, w7, w8] if w.strip()]
|
| 69 |
+
if len(words) < 3:
|
| 70 |
+
return None, "Enter at least 3 words or phrases."
|
| 71 |
+
|
| 72 |
+
embeddings = get_embeddings(words)
|
| 73 |
+
|
| 74 |
+
# t-SNE to 2D
|
| 75 |
+
perplexity = min(5, len(words) - 1)
|
| 76 |
+
tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42, n_iter=1000)
|
| 77 |
+
coords = tsne.fit_transform(embeddings)
|
| 78 |
+
|
| 79 |
+
# Assign colors by rough clustering
|
| 80 |
+
colors = ["#3b82f6", "#10b981", "#f59e0b", "#ef4444", "#8b5cf6", "#ec4899", "#06b6d4", "#84cc16"]
|
| 81 |
+
|
| 82 |
+
# 2D scatter plot
|
| 83 |
+
fig = go.Figure()
|
| 84 |
+
for i, (word, coord) in enumerate(zip(words, coords)):
|
| 85 |
+
fig.add_trace(go.Scatter(
|
| 86 |
+
x=[coord[0]], y=[coord[1]],
|
| 87 |
+
mode="markers+text",
|
| 88 |
+
text=[word],
|
| 89 |
+
textposition="top center",
|
| 90 |
+
textfont=dict(size=14, color=colors[i % len(colors)]),
|
| 91 |
+
marker=dict(size=15, color=colors[i % len(colors)]),
|
| 92 |
+
name=word,
|
| 93 |
+
showlegend=False
|
| 94 |
+
))
|
| 95 |
+
fig.update_layout(
|
| 96 |
+
title="Words Plotted by Meaning (t-SNE 2D Projection)",
|
| 97 |
+
height=500,
|
| 98 |
+
xaxis=dict(showgrid=True, zeroline=False, title=""),
|
| 99 |
+
yaxis=dict(showgrid=True, zeroline=False, title=""),
|
| 100 |
+
margin=dict(l=20, r=20, t=50, b=20)
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Similarity matrix
|
| 104 |
+
sim_matrix = cosine_similarity(embeddings)
|
| 105 |
+
|
| 106 |
+
fig_sim = go.Figure(data=go.Heatmap(
|
| 107 |
+
z=sim_matrix,
|
| 108 |
+
x=words,
|
| 109 |
+
y=words,
|
| 110 |
+
colorscale="Blues",
|
| 111 |
+
text=[[f"{sim_matrix[i][j]:.2f}" for j in range(len(words))] for i in range(len(words))],
|
| 112 |
+
texttemplate="%{text}",
|
| 113 |
+
textfont={"size": 11},
|
| 114 |
+
))
|
| 115 |
+
fig_sim.update_layout(
|
| 116 |
+
title="Cosine Similarity Matrix",
|
| 117 |
+
height=max(350, len(words) * 45),
|
| 118 |
+
margin=dict(l=20, r=20, t=50, b=20)
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# Top pairs
|
| 122 |
+
pairs = []
|
| 123 |
+
for i in range(len(words)):
|
| 124 |
+
for j in range(i + 1, len(words)):
|
| 125 |
+
pairs.append((words[i], words[j], sim_matrix[i][j]))
|
| 126 |
+
pairs.sort(key=lambda x: x[2], reverse=True)
|
| 127 |
+
|
| 128 |
+
md = "## Most Similar Pairs\n\n| Pair | Similarity |\n|------|------------|\n"
|
| 129 |
+
for w_a, w_b, score in pairs[:5]:
|
| 130 |
+
bar = "β" * int(score * 20)
|
| 131 |
+
md += f"| {w_a} β {w_b} | {score:.3f} {bar} |\n"
|
| 132 |
+
|
| 133 |
+
md += "\n## Least Similar Pairs\n\n| Pair | Similarity |\n|------|------------|\n"
|
| 134 |
+
for w_a, w_b, score in pairs[-3:]:
|
| 135 |
+
bar = "β" * int(score * 20)
|
| 136 |
+
md += f"| {w_a} β {w_b} | {score:.3f} {bar} |\n"
|
| 137 |
+
|
| 138 |
+
source = "sentence-transformers (live)" if get_model() is not None else "pre-computed embeddings (demo mode)"
|
| 139 |
+
md += f"\n*Embeddings via: {source}*"
|
| 140 |
+
|
| 141 |
+
return fig, fig_sim, md
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
|
| 146 |
+
with gr.Blocks(title="Embedding Explorer", theme=gr.themes.Soft(primary_hue="blue")) as demo:
|
| 147 |
+
gr.Markdown(
|
| 148 |
+
"# Embedding Explorer\n"
|
| 149 |
+
"Enter words and phrases to see how AI understands meaning.\n"
|
| 150 |
+
"**Similar meanings cluster together. Different meanings stay apart.**"
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
gr.Markdown("### Enter 3β8 words or phrases:")
|
| 154 |
+
with gr.Row():
|
| 155 |
+
w1 = gr.Textbox(value="Madrid", label="Word 1")
|
| 156 |
+
w2 = gr.Textbox(value="Spain", label="Word 2")
|
| 157 |
+
w3 = gr.Textbox(value="Paris", label="Word 3")
|
| 158 |
+
w4 = gr.Textbox(value="France", label="Word 4")
|
| 159 |
+
with gr.Row():
|
| 160 |
+
w5 = gr.Textbox(value="Apple", label="Word 5")
|
| 161 |
+
w6 = gr.Textbox(value="Banana", label="Word 6")
|
| 162 |
+
w7 = gr.Textbox(value="King", label="Word 7")
|
| 163 |
+
w8 = gr.Textbox(value="Queen", label="Word 8")
|
| 164 |
+
|
| 165 |
+
run_btn = gr.Button("Explore Embeddings", variant="primary")
|
| 166 |
+
|
| 167 |
+
scatter = gr.Plot(label="2D Meaning Map")
|
| 168 |
+
heatmap = gr.Plot(label="Similarity Matrix")
|
| 169 |
+
analysis = gr.Markdown()
|
| 170 |
+
|
| 171 |
+
run_btn.click(explore_embeddings, [w1, w2, w3, w4, w5, w6, w7, w8], [scatter, heatmap, analysis])
|
| 172 |
+
demo.load(explore_embeddings, [w1, w2, w3, w4, w5, w6, w7, w8], [scatter, heatmap, analysis])
|
| 173 |
+
|
| 174 |
+
gr.Markdown("---\n*Part of the AI for Product Managers course by Data Trainers LLC*")
|
| 175 |
+
|
| 176 |
+
if __name__ == "__main__":
|
| 177 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0
|
| 2 |
+
sentence-transformers
|
| 3 |
+
scikit-learn
|
| 4 |
+
plotly
|
| 5 |
+
numpy
|