File size: 1,608 Bytes
cbf96f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import streamlit as st
import gensim.downloader as api
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
import plotly.express as px

# Load pre-trained Word2Vec model
@st.cache_resource
def load_model():
    return api.load("word2vec-google-news-300")

model = load_model()

# Streamlit UI
st.title("🔍 Word Embedding Visualization")
st.write("Enter words to visualize their embeddings using t-SNE.")

# User input
words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat")

# Process input
words = [word.strip() for word in words.split(",") if word.strip() in model]

if len(words) < 2:
    st.warning("Please enter at least two valid words from the Word2Vec model.")
else:
    # Get embeddings
    vectors = np.array([model[word] for word in words])

    # Reduce to 2D using t-SNE
    tsne = TSNE(n_components=2, perplexity=5, random_state=42)
    vectors_2d = tsne.fit_transform(vectors)

    # Plot using Matplotlib
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1])

    for i, word in enumerate(words):
        ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12)

    st.pyplot(fig)

    # 3D Visualization with Plotly
    tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42)
    vectors_3d = tsne_3d.fit_transform(vectors)

    df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"])
    df["word"] = words

    fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings")
    st.plotly_chart(fig3d)