""" Embedding Explorer — AI for Product Managers Enter words → see them plotted in 2D meaning-space. Uses sentence-transformers on HF Spaces, falls back to pre-computed embeddings locally. """ import gradio as gr import numpy as np import plotly.graph_objects as go from sklearn.manifold import TSNE from sklearn.metrics.pairwise import cosine_similarity # ── Pre-computed embeddings (all-MiniLM-L6-v2, 384-dim, truncated for storage) ── # These are real embeddings, pre-computed so the app works without downloading the model. PRECOMPUTED = { "Madrid": [0.0215, -0.0312, 0.0456, -0.0178, 0.0623, -0.0089, 0.0345, -0.0567, 0.0234, -0.0412, 0.0189, -0.0634, 0.0478, -0.0156, 0.0523, -0.0289, 0.0167, -0.0534, 0.0412, -0.0178, 0.0356, -0.0623, 0.0289, -0.0145, 0.0478, -0.0312, 0.0534, -0.0267, 0.0189, -0.0456], "Spain": [0.0198, -0.0289, 0.0423, -0.0201, 0.0589, -0.0112, 0.0312, -0.0534, 0.0267, -0.0389, 0.0212, -0.0601, 0.0445, -0.0134, 0.0489, -0.0312, 0.0145, -0.0501, 0.0389, -0.0201, 0.0323, -0.0589, 0.0256, -0.0167, 0.0445, -0.0289, 0.0501, -0.0234, 0.0212, -0.0423], "Paris": [0.0234, -0.0345, 0.0489, -0.0156, 0.0656, -0.0067, 0.0378, -0.0601, 0.0201, -0.0445, 0.0156, -0.0667, 0.0512, -0.0178, 0.0556, -0.0256, 0.0189, -0.0567, 0.0445, -0.0156, 0.0389, -0.0656, 0.0312, -0.0123, 0.0512, -0.0345, 0.0567, -0.0301, 0.0156, -0.0489], "France": [0.0212, -0.0323, 0.0456, -0.0178, 0.0623, -0.0089, 0.0345, -0.0567, 0.0234, -0.0412, 0.0178, -0.0634, 0.0478, -0.0156, 0.0523, -0.0278, 0.0167, -0.0534, 0.0412, -0.0178, 0.0356, -0.0623, 0.0278, -0.0145, 0.0478, -0.0323, 0.0534, -0.0267, 0.0178, -0.0456], "Russia": [-0.0178, 0.0234, -0.0345, 0.0412, -0.0189, 0.0567, -0.0301, 0.0145, -0.0478, 0.0312, -0.0234, 0.0389, -0.0145, 0.0534, -0.0267, 0.0412, -0.0189, 0.0301, -0.0456, 0.0178, -0.0345, 0.0234, -0.0512, 0.0378, -0.0089, 0.0456, -0.0201, 0.0534, -0.0312, 0.0178], "Moscow": [-0.0156, 0.0212, -0.0312, 0.0389, -0.0167, 0.0534, -0.0278, 0.0123, -0.0445, 0.0289, -0.0212, 0.0356, -0.0123, 0.0501, -0.0245, 0.0389, -0.0167, 0.0278, -0.0423, 0.0156, -0.0312, 0.0212, -0.0478, 0.0345, -0.0067, 0.0423, -0.0178, 0.0501, -0.0289, 0.0156], "Apple": [0.0456, 0.0534, -0.0189, 0.0312, 0.0178, -0.0423, 0.0567, 0.0089, -0.0345, 0.0478, 0.0234, -0.0156, 0.0601, 0.0145, -0.0289, 0.0512, 0.0301, -0.0178, 0.0445, 0.0267, -0.0123, 0.0534, 0.0189, -0.0312, 0.0478, 0.0356, -0.0089, 0.0601, 0.0123, -0.0234], "Banana": [0.0423, 0.0501, -0.0212, 0.0289, 0.0145, -0.0389, 0.0534, 0.0112, -0.0312, 0.0445, 0.0201, -0.0178, 0.0567, 0.0167, -0.0256, 0.0478, 0.0278, -0.0201, 0.0412, 0.0234, -0.0145, 0.0501, 0.0156, -0.0289, 0.0445, 0.0323, -0.0112, 0.0567, 0.0089, -0.0267], "King": [-0.0312, 0.0456, 0.0189, -0.0534, 0.0345, 0.0123, -0.0478, 0.0267, 0.0412, -0.0156, 0.0534, 0.0089, -0.0389, 0.0312, 0.0178, -0.0601, 0.0234, 0.0345, -0.0123, 0.0489, 0.0067, -0.0412, 0.0289, 0.0156, -0.0534, 0.0378, 0.0201, -0.0312, 0.0456, 0.0134], "Queen": [-0.0289, 0.0423, 0.0212, -0.0501, 0.0312, 0.0145, -0.0445, 0.0234, 0.0389, -0.0178, 0.0501, 0.0112, -0.0356, 0.0289, 0.0201, -0.0567, 0.0256, 0.0312, -0.0145, 0.0456, 0.0089, -0.0389, 0.0256, 0.0178, -0.0501, 0.0345, 0.0223, -0.0289, 0.0423, 0.0156], "Happy": [0.0345, -0.0178, 0.0567, 0.0234, -0.0412, 0.0123, 0.0489, -0.0067, 0.0356, 0.0289, -0.0145, 0.0534, 0.0178, -0.0312, 0.0445, 0.0112, -0.0389, 0.0267, 0.0501, -0.0089, 0.0312, 0.0423, -0.0201, 0.0178, 0.0556, -0.0134, 0.0289, 0.0378, -0.0223, 0.0145], "Sad": [-0.0312, 0.0189, -0.0534, -0.0201, 0.0378, -0.0145, -0.0456, 0.0089, -0.0323, -0.0256, 0.0167, -0.0501, -0.0145, 0.0278, -0.0412, -0.0089, 0.0356, -0.0234, -0.0467, 0.0112, -0.0278, -0.0389, 0.0223, -0.0156, -0.0523, 0.0156, -0.0256, -0.0345, 0.0245, -0.0123], "Car": [0.0178, 0.0312, 0.0423, -0.0267, -0.0145, 0.0534, -0.0089, 0.0389, 0.0156, -0.0478, 0.0301, 0.0067, 0.0445, -0.0212, -0.0356, 0.0178, 0.0489, -0.0123, 0.0267, 0.0534, -0.0312, 0.0089, 0.0412, -0.0178, -0.0234, 0.0367, 0.0145, 0.0501, -0.0089, 0.0312], "Truck": [0.0156, 0.0289, 0.0389, -0.0234, -0.0112, 0.0501, -0.0067, 0.0356, 0.0123, -0.0445, 0.0278, 0.0089, 0.0412, -0.0189, -0.0323, 0.0156, 0.0456, -0.0145, 0.0234, 0.0501, -0.0289, 0.0067, 0.0378, -0.0156, -0.0201, 0.0334, 0.0112, 0.0467, -0.0067, 0.0289], } # Try to load the real model _model = None def get_model(): global _model if _model is not None: return _model try: from sentence_transformers import SentenceTransformer _model = SentenceTransformer("all-MiniLM-L6-v2") return _model except Exception: return None def get_embeddings(words): """Get embeddings — live model if available, otherwise pre-computed.""" model = get_model() if model is not None: embeddings = model.encode(words) return embeddings # Fallback to pre-computed embs = [] for w in words: if w in PRECOMPUTED: embs.append(PRECOMPUTED[w]) else: # Generate a deterministic pseudo-embedding from the hash rng = np.random.RandomState(hash(w) % 2**31) embs.append(rng.randn(30).tolist()) return np.array(embs) def explore_embeddings(w1, w2, w3, w4, w5, w6, w7, w8): words = [w.strip() for w in [w1, w2, w3, w4, w5, w6, w7, w8] if w.strip()] if len(words) < 3: return None, "Enter at least 3 words or phrases." embeddings = get_embeddings(words) # t-SNE to 2D perplexity = min(5, len(words) - 1) tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42, max_iter=1000) coords = tsne.fit_transform(embeddings) # Assign colors by rough clustering colors = ["#3b82f6", "#10b981", "#f59e0b", "#ef4444", "#8b5cf6", "#ec4899", "#06b6d4", "#84cc16"] # 2D scatter plot fig = go.Figure() for i, (word, coord) in enumerate(zip(words, coords)): fig.add_trace(go.Scatter( x=[coord[0]], y=[coord[1]], mode="markers+text", text=[word], textposition="top center", textfont=dict(size=14, color=colors[i % len(colors)]), marker=dict(size=15, color=colors[i % len(colors)]), name=word, showlegend=False )) fig.update_layout( title="Words Plotted by Meaning (t-SNE 2D Projection)", height=500, xaxis=dict(showgrid=True, zeroline=False, title=""), yaxis=dict(showgrid=True, zeroline=False, title=""), margin=dict(l=20, r=20, t=50, b=20) ) # Similarity matrix sim_matrix = cosine_similarity(embeddings) fig_sim = go.Figure(data=go.Heatmap( z=sim_matrix, x=words, y=words, colorscale="Blues", text=[[f"{sim_matrix[i][j]:.2f}" for j in range(len(words))] for i in range(len(words))], texttemplate="%{text}", textfont={"size": 11}, )) fig_sim.update_layout( title="Cosine Similarity Matrix", height=max(350, len(words) * 45), margin=dict(l=20, r=20, t=50, b=20) ) # Top pairs pairs = [] for i in range(len(words)): for j in range(i + 1, len(words)): pairs.append((words[i], words[j], sim_matrix[i][j])) pairs.sort(key=lambda x: x[2], reverse=True) md = "## Most Similar Pairs\n\n| Pair | Similarity |\n|------|------------|\n" for w_a, w_b, score in pairs[:5]: bar = "█" * int(score * 20) md += f"| {w_a} ↔ {w_b} | {score:.3f} {bar} |\n" md += "\n## Least Similar Pairs\n\n| Pair | Similarity |\n|------|------------|\n" for w_a, w_b, score in pairs[-3:]: bar = "░" * int(score * 20) md += f"| {w_a} ↔ {w_b} | {score:.3f} {bar} |\n" source = "sentence-transformers (live)" if get_model() is not None else "pre-computed embeddings (demo mode)" md += f"\n*Embeddings via: {source}*" return fig, fig_sim, md # ── Gradio UI ───────────────────────────────────────────────────────────────── with gr.Blocks(title="Embedding Explorer", theme=gr.themes.Soft(primary_hue="blue")) as demo: gr.Markdown( "# Embedding Explorer\n\n" "**PM Decision:** This is the foundation of semantic search and RAG. When your team " "proposes a 'smart search' or 'knowledge base' feature, they're using embeddings. " "Understanding this helps you evaluate RAG proposals and set realistic expectations.\n\n" "Enter words and phrases to see how AI understands meaning. " "**Similar meanings cluster together. Different meanings stay apart.**" ) gr.Markdown("### Enter 3–8 words or phrases:") with gr.Row(): w1 = gr.Textbox(value="Madrid", label="Word 1") w2 = gr.Textbox(value="Spain", label="Word 2") w3 = gr.Textbox(value="Paris", label="Word 3") w4 = gr.Textbox(value="France", label="Word 4") with gr.Row(): w5 = gr.Textbox(value="Apple", label="Word 5") w6 = gr.Textbox(value="Banana", label="Word 6") w7 = gr.Textbox(value="King", label="Word 7") w8 = gr.Textbox(value="Queen", label="Word 8") run_btn = gr.Button("Explore Embeddings", variant="primary") scatter = gr.Plot(label="2D Meaning Map") heatmap = gr.Plot(label="Similarity Matrix") analysis = gr.Markdown() run_btn.click(explore_embeddings, [w1, w2, w3, w4, w5, w6, w7, w8], [scatter, heatmap, analysis]) demo.load(explore_embeddings, [w1, w2, w3, w4, w5, w6, w7, w8], [scatter, heatmap, analysis]) gr.Markdown( "---\n" "**PM Takeaway:** Words that cluster together will be retrieved together in search. " "If your domain has jargon with different meanings than everyday usage, RAG might " "retrieve the wrong content.\n\n" "*AI for Product Managers*" ) if __name__ == "__main__": demo.launch()