embedVis / app.py
amasood's picture
Create app.py
cbf96f3 verified
import streamlit as st
import gensim.downloader as api
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
import plotly.express as px
# Load pre-trained Word2Vec model
@st.cache_resource
def load_model():
return api.load("word2vec-google-news-300")
model = load_model()
# Streamlit UI
st.title("๐Ÿ” Word Embedding Visualization")
st.write("Enter words to visualize their embeddings using t-SNE.")
# User input
words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat")
# Process input
words = [word.strip() for word in words.split(",") if word.strip() in model]
if len(words) < 2:
st.warning("Please enter at least two valid words from the Word2Vec model.")
else:
# Get embeddings
vectors = np.array([model[word] for word in words])
# Reduce to 2D using t-SNE
tsne = TSNE(n_components=2, perplexity=5, random_state=42)
vectors_2d = tsne.fit_transform(vectors)
# Plot using Matplotlib
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1])
for i, word in enumerate(words):
ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12)
st.pyplot(fig)
# 3D Visualization with Plotly
tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42)
vectors_3d = tsne_3d.fit_transform(vectors)
df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"])
df["word"] = words
fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings")
st.plotly_chart(fig3d)