Spaces:

amasood
/

embedVis

Build error

File size: 1,608 Bytes

cbf96f3

import streamlit as st
import gensim.downloader as api
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
import plotly.express as px

# Load pre-trained Word2Vec model
@st.cache_resource
def load_model():
    return api.load("word2vec-google-news-300")

model = load_model()

# Streamlit UI
st.title("🔍 Word Embedding Visualization")
st.write("Enter words to visualize their embeddings using t-SNE.")

# User input
words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat")

# Process input
words = [word.strip() for word in words.split(",") if word.strip() in model]

if len(words) < 2:
    st.warning("Please enter at least two valid words from the Word2Vec model.")
else:
    # Get embeddings
    vectors = np.array([model[word] for word in words])

    # Reduce to 2D using t-SNE
    tsne = TSNE(n_components=2, perplexity=5, random_state=42)
    vectors_2d = tsne.fit_transform(vectors)

    # Plot using Matplotlib
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1])

    for i, word in enumerate(words):
        ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12)

    st.pyplot(fig)

    # 3D Visualization with Plotly
    tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42)
    vectors_3d = tsne_3d.fit_transform(vectors)

    df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"])
    df["word"] = words

    fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings")
    st.plotly_chart(fig3d)