Spaces:

amasood
/

embedVis

Build error

embedVis / app.py

Create app.py

cbf96f3 verified 11 months ago

1.61 kB

	import streamlit as st
	import gensim.downloader as api
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.manifold import TSNE
	import pandas as pd
	import plotly.express as px

	# Load pre-trained Word2Vec model
	@st.cache_resource
	def load_model():
	return api.load("word2vec-google-news-300")

	model = load_model()

	# Streamlit UI
	st.title("🔍 Word Embedding Visualization")
	st.write("Enter words to visualize their embeddings using t-SNE.")

	# User input
	words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat")

	# Process input
	words = [word.strip() for word in words.split(",") if word.strip() in model]

	if len(words) < 2:
	st.warning("Please enter at least two valid words from the Word2Vec model.")
	else:
	# Get embeddings
	vectors = np.array([model[word] for word in words])

	# Reduce to 2D using t-SNE
	tsne = TSNE(n_components=2, perplexity=5, random_state=42)
	vectors_2d = tsne.fit_transform(vectors)

	# Plot using Matplotlib
	fig, ax = plt.subplots(figsize=(8, 6))
	ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1])

	for i, word in enumerate(words):
	ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12)

	st.pyplot(fig)

	# 3D Visualization with Plotly
	tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42)
	vectors_3d = tsne_3d.fit_transform(vectors)

	df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"])
	df["word"] = words

	fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings")
	st.plotly_chart(fig3d)