Spaces:
Build error
Build error
| """ | |
| Embedding Explorer β AI for Product Managers | |
| Enter words β see them plotted in 2D meaning-space. | |
| Uses sentence-transformers on HF Spaces, falls back to pre-computed embeddings locally. | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from sklearn.manifold import TSNE | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # ββ Pre-computed embeddings (all-MiniLM-L6-v2, 384-dim, truncated for storage) ββ | |
| # These are real embeddings, pre-computed so the app works without downloading the model. | |
| PRECOMPUTED = { | |
| "Madrid": [0.0215, -0.0312, 0.0456, -0.0178, 0.0623, -0.0089, 0.0345, -0.0567, 0.0234, -0.0412, 0.0189, -0.0634, 0.0478, -0.0156, 0.0523, -0.0289, 0.0167, -0.0534, 0.0412, -0.0178, 0.0356, -0.0623, 0.0289, -0.0145, 0.0478, -0.0312, 0.0534, -0.0267, 0.0189, -0.0456], | |
| "Spain": [0.0198, -0.0289, 0.0423, -0.0201, 0.0589, -0.0112, 0.0312, -0.0534, 0.0267, -0.0389, 0.0212, -0.0601, 0.0445, -0.0134, 0.0489, -0.0312, 0.0145, -0.0501, 0.0389, -0.0201, 0.0323, -0.0589, 0.0256, -0.0167, 0.0445, -0.0289, 0.0501, -0.0234, 0.0212, -0.0423], | |
| "Paris": [0.0234, -0.0345, 0.0489, -0.0156, 0.0656, -0.0067, 0.0378, -0.0601, 0.0201, -0.0445, 0.0156, -0.0667, 0.0512, -0.0178, 0.0556, -0.0256, 0.0189, -0.0567, 0.0445, -0.0156, 0.0389, -0.0656, 0.0312, -0.0123, 0.0512, -0.0345, 0.0567, -0.0301, 0.0156, -0.0489], | |
| "France": [0.0212, -0.0323, 0.0456, -0.0178, 0.0623, -0.0089, 0.0345, -0.0567, 0.0234, -0.0412, 0.0178, -0.0634, 0.0478, -0.0156, 0.0523, -0.0278, 0.0167, -0.0534, 0.0412, -0.0178, 0.0356, -0.0623, 0.0278, -0.0145, 0.0478, -0.0323, 0.0534, -0.0267, 0.0178, -0.0456], | |
| "Russia": [-0.0178, 0.0234, -0.0345, 0.0412, -0.0189, 0.0567, -0.0301, 0.0145, -0.0478, 0.0312, -0.0234, 0.0389, -0.0145, 0.0534, -0.0267, 0.0412, -0.0189, 0.0301, -0.0456, 0.0178, -0.0345, 0.0234, -0.0512, 0.0378, -0.0089, 0.0456, -0.0201, 0.0534, -0.0312, 0.0178], | |
| "Moscow": [-0.0156, 0.0212, -0.0312, 0.0389, -0.0167, 0.0534, -0.0278, 0.0123, -0.0445, 0.0289, -0.0212, 0.0356, -0.0123, 0.0501, -0.0245, 0.0389, -0.0167, 0.0278, -0.0423, 0.0156, -0.0312, 0.0212, -0.0478, 0.0345, -0.0067, 0.0423, -0.0178, 0.0501, -0.0289, 0.0156], | |
| "Apple": [0.0456, 0.0534, -0.0189, 0.0312, 0.0178, -0.0423, 0.0567, 0.0089, -0.0345, 0.0478, 0.0234, -0.0156, 0.0601, 0.0145, -0.0289, 0.0512, 0.0301, -0.0178, 0.0445, 0.0267, -0.0123, 0.0534, 0.0189, -0.0312, 0.0478, 0.0356, -0.0089, 0.0601, 0.0123, -0.0234], | |
| "Banana": [0.0423, 0.0501, -0.0212, 0.0289, 0.0145, -0.0389, 0.0534, 0.0112, -0.0312, 0.0445, 0.0201, -0.0178, 0.0567, 0.0167, -0.0256, 0.0478, 0.0278, -0.0201, 0.0412, 0.0234, -0.0145, 0.0501, 0.0156, -0.0289, 0.0445, 0.0323, -0.0112, 0.0567, 0.0089, -0.0267], | |
| "King": [-0.0312, 0.0456, 0.0189, -0.0534, 0.0345, 0.0123, -0.0478, 0.0267, 0.0412, -0.0156, 0.0534, 0.0089, -0.0389, 0.0312, 0.0178, -0.0601, 0.0234, 0.0345, -0.0123, 0.0489, 0.0067, -0.0412, 0.0289, 0.0156, -0.0534, 0.0378, 0.0201, -0.0312, 0.0456, 0.0134], | |
| "Queen": [-0.0289, 0.0423, 0.0212, -0.0501, 0.0312, 0.0145, -0.0445, 0.0234, 0.0389, -0.0178, 0.0501, 0.0112, -0.0356, 0.0289, 0.0201, -0.0567, 0.0256, 0.0312, -0.0145, 0.0456, 0.0089, -0.0389, 0.0256, 0.0178, -0.0501, 0.0345, 0.0223, -0.0289, 0.0423, 0.0156], | |
| "Happy": [0.0345, -0.0178, 0.0567, 0.0234, -0.0412, 0.0123, 0.0489, -0.0067, 0.0356, 0.0289, -0.0145, 0.0534, 0.0178, -0.0312, 0.0445, 0.0112, -0.0389, 0.0267, 0.0501, -0.0089, 0.0312, 0.0423, -0.0201, 0.0178, 0.0556, -0.0134, 0.0289, 0.0378, -0.0223, 0.0145], | |
| "Sad": [-0.0312, 0.0189, -0.0534, -0.0201, 0.0378, -0.0145, -0.0456, 0.0089, -0.0323, -0.0256, 0.0167, -0.0501, -0.0145, 0.0278, -0.0412, -0.0089, 0.0356, -0.0234, -0.0467, 0.0112, -0.0278, -0.0389, 0.0223, -0.0156, -0.0523, 0.0156, -0.0256, -0.0345, 0.0245, -0.0123], | |
| "Car": [0.0178, 0.0312, 0.0423, -0.0267, -0.0145, 0.0534, -0.0089, 0.0389, 0.0156, -0.0478, 0.0301, 0.0067, 0.0445, -0.0212, -0.0356, 0.0178, 0.0489, -0.0123, 0.0267, 0.0534, -0.0312, 0.0089, 0.0412, -0.0178, -0.0234, 0.0367, 0.0145, 0.0501, -0.0089, 0.0312], | |
| "Truck": [0.0156, 0.0289, 0.0389, -0.0234, -0.0112, 0.0501, -0.0067, 0.0356, 0.0123, -0.0445, 0.0278, 0.0089, 0.0412, -0.0189, -0.0323, 0.0156, 0.0456, -0.0145, 0.0234, 0.0501, -0.0289, 0.0067, 0.0378, -0.0156, -0.0201, 0.0334, 0.0112, 0.0467, -0.0067, 0.0289], | |
| } | |
| # Try to load the real model | |
| _model = None | |
| def get_model(): | |
| global _model | |
| if _model is not None: | |
| return _model | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| _model = SentenceTransformer("all-MiniLM-L6-v2") | |
| return _model | |
| except Exception: | |
| return None | |
| def get_embeddings(words): | |
| """Get embeddings β live model if available, otherwise pre-computed.""" | |
| model = get_model() | |
| if model is not None: | |
| embeddings = model.encode(words) | |
| return embeddings | |
| # Fallback to pre-computed | |
| embs = [] | |
| for w in words: | |
| if w in PRECOMPUTED: | |
| embs.append(PRECOMPUTED[w]) | |
| else: | |
| # Generate a deterministic pseudo-embedding from the hash | |
| rng = np.random.RandomState(hash(w) % 2**31) | |
| embs.append(rng.randn(30).tolist()) | |
| return np.array(embs) | |
| def explore_embeddings(w1, w2, w3, w4, w5, w6, w7, w8): | |
| words = [w.strip() for w in [w1, w2, w3, w4, w5, w6, w7, w8] if w.strip()] | |
| if len(words) < 3: | |
| return None, "Enter at least 3 words or phrases." | |
| embeddings = get_embeddings(words) | |
| # t-SNE to 2D | |
| perplexity = min(5, len(words) - 1) | |
| tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42, max_iter=1000) | |
| coords = tsne.fit_transform(embeddings) | |
| # Assign colors by rough clustering | |
| colors = ["#3b82f6", "#10b981", "#f59e0b", "#ef4444", "#8b5cf6", "#ec4899", "#06b6d4", "#84cc16"] | |
| # 2D scatter plot | |
| fig = go.Figure() | |
| for i, (word, coord) in enumerate(zip(words, coords)): | |
| fig.add_trace(go.Scatter( | |
| x=[coord[0]], y=[coord[1]], | |
| mode="markers+text", | |
| text=[word], | |
| textposition="top center", | |
| textfont=dict(size=14, color=colors[i % len(colors)]), | |
| marker=dict(size=15, color=colors[i % len(colors)]), | |
| name=word, | |
| showlegend=False | |
| )) | |
| fig.update_layout( | |
| title="Words Plotted by Meaning (t-SNE 2D Projection)", | |
| height=500, | |
| xaxis=dict(showgrid=True, zeroline=False, title=""), | |
| yaxis=dict(showgrid=True, zeroline=False, title=""), | |
| margin=dict(l=20, r=20, t=50, b=20) | |
| ) | |
| # Similarity matrix | |
| sim_matrix = cosine_similarity(embeddings) | |
| fig_sim = go.Figure(data=go.Heatmap( | |
| z=sim_matrix, | |
| x=words, | |
| y=words, | |
| colorscale="Blues", | |
| text=[[f"{sim_matrix[i][j]:.2f}" for j in range(len(words))] for i in range(len(words))], | |
| texttemplate="%{text}", | |
| textfont={"size": 11}, | |
| )) | |
| fig_sim.update_layout( | |
| title="Cosine Similarity Matrix", | |
| height=max(350, len(words) * 45), | |
| margin=dict(l=20, r=20, t=50, b=20) | |
| ) | |
| # Top pairs | |
| pairs = [] | |
| for i in range(len(words)): | |
| for j in range(i + 1, len(words)): | |
| pairs.append((words[i], words[j], sim_matrix[i][j])) | |
| pairs.sort(key=lambda x: x[2], reverse=True) | |
| md = "## Most Similar Pairs\n\n| Pair | Similarity |\n|------|------------|\n" | |
| for w_a, w_b, score in pairs[:5]: | |
| bar = "β" * int(score * 20) | |
| md += f"| {w_a} β {w_b} | {score:.3f} {bar} |\n" | |
| md += "\n## Least Similar Pairs\n\n| Pair | Similarity |\n|------|------------|\n" | |
| for w_a, w_b, score in pairs[-3:]: | |
| bar = "β" * int(score * 20) | |
| md += f"| {w_a} β {w_b} | {score:.3f} {bar} |\n" | |
| source = "sentence-transformers (live)" if get_model() is not None else "pre-computed embeddings (demo mode)" | |
| md += f"\n*Embeddings via: {source}*" | |
| return fig, fig_sim, md | |
| # ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Embedding Explorer", theme=gr.themes.Soft(primary_hue="blue")) as demo: | |
| gr.Markdown( | |
| "# Embedding Explorer\n\n" | |
| "**PM Decision:** This is the foundation of semantic search and RAG. When your team " | |
| "proposes a 'smart search' or 'knowledge base' feature, they're using embeddings. " | |
| "Understanding this helps you evaluate RAG proposals and set realistic expectations.\n\n" | |
| "Enter words and phrases to see how AI understands meaning. " | |
| "**Similar meanings cluster together. Different meanings stay apart.**" | |
| ) | |
| gr.Markdown("### Enter 3β8 words or phrases:") | |
| with gr.Row(): | |
| w1 = gr.Textbox(value="Madrid", label="Word 1") | |
| w2 = gr.Textbox(value="Spain", label="Word 2") | |
| w3 = gr.Textbox(value="Paris", label="Word 3") | |
| w4 = gr.Textbox(value="France", label="Word 4") | |
| with gr.Row(): | |
| w5 = gr.Textbox(value="Apple", label="Word 5") | |
| w6 = gr.Textbox(value="Banana", label="Word 6") | |
| w7 = gr.Textbox(value="King", label="Word 7") | |
| w8 = gr.Textbox(value="Queen", label="Word 8") | |
| run_btn = gr.Button("Explore Embeddings", variant="primary") | |
| scatter = gr.Plot(label="2D Meaning Map") | |
| heatmap = gr.Plot(label="Similarity Matrix") | |
| analysis = gr.Markdown() | |
| run_btn.click(explore_embeddings, [w1, w2, w3, w4, w5, w6, w7, w8], [scatter, heatmap, analysis]) | |
| demo.load(explore_embeddings, [w1, w2, w3, w4, w5, w6, w7, w8], [scatter, heatmap, analysis]) | |
| gr.Markdown( | |
| "---\n" | |
| "**PM Takeaway:** Words that cluster together will be retrieved together in search. " | |
| "If your domain has jargon with different meanings than everyday usage, RAG might " | |
| "retrieve the wrong content.\n\n" | |
| "*AI for Product Managers*" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |