|
|
import streamlit as st |
|
|
import gensim.downloader as api |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
from sklearn.manifold import TSNE |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
return api.load("word2vec-google-news-300") |
|
|
|
|
|
model = load_model() |
|
|
|
|
|
|
|
|
st.title("๐ Word Embedding Visualization") |
|
|
st.write("Enter words to visualize their embeddings using t-SNE.") |
|
|
|
|
|
|
|
|
words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat") |
|
|
|
|
|
|
|
|
words = [word.strip() for word in words.split(",") if word.strip() in model] |
|
|
|
|
|
if len(words) < 2: |
|
|
st.warning("Please enter at least two valid words from the Word2Vec model.") |
|
|
else: |
|
|
|
|
|
vectors = np.array([model[word] for word in words]) |
|
|
|
|
|
|
|
|
tsne = TSNE(n_components=2, perplexity=5, random_state=42) |
|
|
vectors_2d = tsne.fit_transform(vectors) |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 6)) |
|
|
ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1]) |
|
|
|
|
|
for i, word in enumerate(words): |
|
|
ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12) |
|
|
|
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42) |
|
|
vectors_3d = tsne_3d.fit_transform(vectors) |
|
|
|
|
|
df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"]) |
|
|
df["word"] = words |
|
|
|
|
|
fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings") |
|
|
st.plotly_chart(fig3d) |
|
|
|