File size: 1,608 Bytes
cbf96f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import streamlit as st
import gensim.downloader as api
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
import plotly.express as px
# Load pre-trained Word2Vec model
@st.cache_resource
def load_model():
return api.load("word2vec-google-news-300")
model = load_model()
# Streamlit UI
st.title("🔍 Word Embedding Visualization")
st.write("Enter words to visualize their embeddings using t-SNE.")
# User input
words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat")
# Process input
words = [word.strip() for word in words.split(",") if word.strip() in model]
if len(words) < 2:
st.warning("Please enter at least two valid words from the Word2Vec model.")
else:
# Get embeddings
vectors = np.array([model[word] for word in words])
# Reduce to 2D using t-SNE
tsne = TSNE(n_components=2, perplexity=5, random_state=42)
vectors_2d = tsne.fit_transform(vectors)
# Plot using Matplotlib
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1])
for i, word in enumerate(words):
ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12)
st.pyplot(fig)
# 3D Visualization with Plotly
tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42)
vectors_3d = tsne_3d.fit_transform(vectors)
df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"])
df["word"] = words
fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings")
st.plotly_chart(fig3d)
|