| import streamlit as st |
| import matplotlib.pyplot as plt |
| import numpy as np |
| from mpl_toolkits.mplot3d import Axes3D |
| import umap |
| import pandas as pd |
| from word2vec import * |
| from sklearn.preprocessing import StandardScaler |
| import plotly.express as px |
| from sklearn.manifold import TSNE |
|
|
|
|
| def make_3d_plot_tSNE(vectors_list, target_word, time_slice_model): |
| """ |
| Turn list of 100D vectors into a 3D plot using t-SNE and Plotly. |
| List structure: [(word, model_name, vector, cosine_sim)] |
| """ |
| word = target_word |
| |
| |
| model = load_word2vec_model(f'models/{time_slice_model}.model') |
| |
| |
| all_vectors = {} |
| with open(f'./3d_models/{time_slice_model}.model', 'rb') as f: |
| result_with_names = pickle.load(f) |
| |
| for word, vector in result_with_names: |
| all_vectors[word] = vector |
| |
| |
| |
| |
| result_with_names = [(word, all_vectors[word], cosine_sim) for word, _, _, cosine_sim in vectors_list] |
|
|
| |
| |
| |
| df = pd.DataFrame(result_with_names, columns=['word', '3d_vector', 'cosine_sim']) |
| |
| |
| df = df.sort_values(by='cosine_sim', ascending=False) |
| |
| |
| x = df['3d_vector'].apply(lambda v: v[0]) |
| y = df['3d_vector'].apply(lambda v: v[1]) |
| z = df['3d_vector'].apply(lambda v: v[2]) |
| |
| |
| fig = px.scatter_3d(df, x=x, y=y, z=z, text='word', color='cosine_sim', color_continuous_scale='Reds') |
| fig.update_traces(marker=dict(size=5)) |
| fig.update_layout(title=f'3D plot of nearest neighbours to {target_word}') |
| |
| return fig, df |
| |
|
|