commented unused part in plots.py
Browse files
plots.py
CHANGED
|
@@ -8,87 +8,87 @@ from word2vec import *
|
|
| 8 |
from sklearn.preprocessing import StandardScaler
|
| 9 |
|
| 10 |
|
| 11 |
-
def make_3d_plot(new_3d_vectors):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
|
| 40 |
-
|
| 41 |
|
| 42 |
|
| 43 |
-
import plotly.express as px
|
| 44 |
|
| 45 |
|
| 46 |
-
def make_3d_plot2(df):
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
|
| 56 |
-
def make_3d_plot3(vectors_list, word, time_slice_model):
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
|
| 91 |
-
|
| 92 |
|
| 93 |
|
| 94 |
|
|
|
|
| 8 |
from sklearn.preprocessing import StandardScaler
|
| 9 |
|
| 10 |
|
| 11 |
+
# def make_3d_plot(new_3d_vectors):
|
| 12 |
+
# """
|
| 13 |
+
# Turn DataFrame of 3D vectors into a 3D plot
|
| 14 |
+
# DataFrame structure: ['word', 'cosine_sim', '3d_vector']
|
| 15 |
+
# """
|
| 16 |
+
# fig = plt.figure()
|
| 17 |
+
# ax = fig.add_subplot(projection='3d')
|
| 18 |
|
| 19 |
+
# plt.ion()
|
| 20 |
|
| 21 |
+
# # Unpack vectors and labels from DataFrame
|
| 22 |
+
# labels = new_3d_vectors['word']
|
| 23 |
+
# x = new_3d_vectors['3d_vector'].apply(lambda v: v[0])
|
| 24 |
+
# y = new_3d_vectors['3d_vector'].apply(lambda v: v[1])
|
| 25 |
+
# z = new_3d_vectors['3d_vector'].apply(lambda v: v[2])
|
| 26 |
|
| 27 |
+
# # Plot points
|
| 28 |
+
# ax.scatter(x, y, z)
|
| 29 |
|
| 30 |
+
# # Add labels
|
| 31 |
+
# for i, label in enumerate(labels):
|
| 32 |
+
# ax.text(x[i], y[i], z[i], label)
|
| 33 |
|
| 34 |
+
# # Set labels and title
|
| 35 |
+
# ax.set_xlabel('X')
|
| 36 |
+
# ax.set_ylabel('Y')
|
| 37 |
+
# ax.set_zlabel('Z')
|
| 38 |
+
# ax.set_title('3D plot of word vectors')
|
| 39 |
|
| 40 |
+
# return fig
|
| 41 |
|
| 42 |
|
| 43 |
+
# import plotly.express as px
|
| 44 |
|
| 45 |
|
| 46 |
+
# def make_3d_plot2(df):
|
| 47 |
+
# """
|
| 48 |
+
# Turn DataFrame of 3D vectors into a 3D plot using plotly
|
| 49 |
+
# DataFrame structure: ['word', 'cosine_sim', '3d_vector']
|
| 50 |
+
# """
|
| 51 |
+
# vectors = df['3d_vector'].tolist()
|
| 52 |
+
# fig = px.scatter_3d(df, x=[v[0] for v in vectors], y=[v[1] for v in vectors], z=[v[2] for v in vectors], text=df['word'])
|
| 53 |
+
# return fig
|
| 54 |
|
| 55 |
|
| 56 |
+
# def make_3d_plot3(vectors_list, word, time_slice_model):
|
| 57 |
+
# """
|
| 58 |
+
# Turn list of 100D vectors into a 3D plot using UMAP and Plotly.
|
| 59 |
+
# List structure: [(word, model_name, vector, cosine_sim)]
|
| 60 |
+
# """
|
| 61 |
+
# # Load model
|
| 62 |
+
# model = load_word2vec_model(f'models/{time_slice_model}.model')
|
| 63 |
|
| 64 |
+
# # Make UMAP model and fit it to the vectors
|
| 65 |
+
# umap_model = umap.UMAP(n_components=3)
|
| 66 |
+
# umap_model.fit(model.wv.vectors)
|
| 67 |
|
| 68 |
+
# # Transform the vectors to 3D
|
| 69 |
+
# transformed_vectors = umap_model.transform(model.wv.vectors)
|
| 70 |
|
| 71 |
|
| 72 |
+
# # Create DataFrame from the transformed vectors
|
| 73 |
+
# df = pd.DataFrame(transformed_vectors, columns=['x', 'y', 'z'])
|
| 74 |
|
| 75 |
+
# # Add word and cosine similarity to DataFrame
|
| 76 |
+
# df['word'] = model.wv.index_to_key
|
| 77 |
|
| 78 |
+
# # Filter the DataFrame for words in vectors_list and add cosine similarity
|
| 79 |
+
# word_list = [v[0] for v in vectors_list]
|
| 80 |
+
# cosine_sim_list = [v[3] for v in vectors_list]
|
| 81 |
|
| 82 |
+
# # Ensure that the word list and cosine similarity list are aligned properly
|
| 83 |
+
# df = df[df['word'].isin(word_list)]
|
| 84 |
+
# df['cosine_sim'] = cosine_sim_list
|
| 85 |
|
| 86 |
+
# # Create plot
|
| 87 |
+
# fig = px.scatter_3d(df, x='x', y='y', z='z', text='word', color='cosine_sim', color_continuous_scale='Reds')
|
| 88 |
+
# fig.update_traces(marker=dict(size=5))
|
| 89 |
+
# fig.update_layout(title=f'3D plot of nearest neighbours to {word}')
|
| 90 |
|
| 91 |
+
# return fig, df
|
| 92 |
|
| 93 |
|
| 94 |
|