import streamlit as st import gensim.downloader as api import numpy as np import matplotlib.pyplot as plt from sklearn.manifold import TSNE import pandas as pd import plotly.express as px # Load pre-trained Word2Vec model @st.cache_resource def load_model(): return api.load("word2vec-google-news-300") model = load_model() # Streamlit UI st.title("🔍 Word Embedding Visualization") st.write("Enter words to visualize their embeddings using t-SNE.") # User input words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat") # Process input words = [word.strip() for word in words.split(",") if word.strip() in model] if len(words) < 2: st.warning("Please enter at least two valid words from the Word2Vec model.") else: # Get embeddings vectors = np.array([model[word] for word in words]) # Reduce to 2D using t-SNE tsne = TSNE(n_components=2, perplexity=5, random_state=42) vectors_2d = tsne.fit_transform(vectors) # Plot using Matplotlib fig, ax = plt.subplots(figsize=(8, 6)) ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1]) for i, word in enumerate(words): ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12) st.pyplot(fig) # 3D Visualization with Plotly tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42) vectors_3d = tsne_3d.fit_transform(vectors) df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"]) df["word"] = words fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings") st.plotly_chart(fig3d)