Spaces:
Build error
Build error
| import gensim.downloader | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sklearn.decomposition import PCA | |
| from sklearn.manifold import TSNE | |
| model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50 | |
| cache = "/home/user/app/d.png" | |
| # Function to reduce dimensions | |
| def reduce_dimensions(data, method='PCA'): | |
| if method == 'PCA': | |
| model = PCA(n_components=2) | |
| elif method == 'TSNE': | |
| model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4) | |
| return model.fit_transform(data) | |
| # Plotting function | |
| def plot_reduced_data(reduced_data, labels, title): | |
| plt.figure(figsize=(10, 8)) | |
| plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6) | |
| for i, label in enumerate(labels): | |
| plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18) | |
| plt.title(title) | |
| # Data for the arrow 1 | |
| start_point = (reduced_data[0, 0], reduced_data[0, 1]) # Starting point of the arrow | |
| end_point = (reduced_data[1, 0], reduced_data[1, 1]) # Ending point of the arrow | |
| # Adding an arrow 1 | |
| plt.annotate('', xy=end_point, xytext=start_point, | |
| arrowprops=dict(arrowstyle="->", color='green', lw=3)) | |
| # Data for the arrow 2 | |
| end_point = (reduced_data[-1, 0] , reduced_data[-1, 1]) # Starting point of the arrow | |
| start_point = (reduced_data[2, 0], reduced_data[2, 1]) # Ending point of the arrow | |
| # Adding an arrow 2 | |
| plt.annotate('', xy=end_point, xytext=start_point, | |
| arrowprops=dict(arrowstyle="->", color='green', lw=3)) | |
| plt.xlabel('Component 1') | |
| plt.ylabel('Component 2') | |
| plt.grid(True) | |
| plt.savefig(cache) #, dpi=300) | |
| description = """ | |
| ### Word Embedding Demo App | |
| Universidade Federal de São Paulo - Escola Paulista de Medicina | |
| The output is Word3 + (Word2 - Word1) | |
| Credits: | |
| * Gensim | |
| * Word2Vec | |
| """ | |
| Word1 = gr.Textbox() | |
| Word2 = gr.Textbox() | |
| Word3 = gr.Textbox() | |
| label = gr.Label(show_label=True, label="Word4") | |
| sp = gr.Image() | |
| def inference(word1, word2, word3): | |
| transform = model[word3] + model[word2] - model[word1] | |
| output = model.similar_by_vector(transform) | |
| print(output) | |
| word_list = [word1, word2, word3] | |
| word_list.extend([x for x,y in [item for item in output[:6]]]) | |
| words = {key: model[key] for key in word_list} | |
| words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform | |
| data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0) | |
| print(data.shape) | |
| labels = words.keys() | |
| reduced_data_pca = reduce_dimensions(data, method='PCA') | |
| print(reduced_data_pca.shape) | |
| plot_reduced_data(reduced_data_pca, labels, 'PCA Results') | |
| return cache | |
| examples = [ | |
| ["woman", "man", "girl"], | |
| ["woman", "man", "granddaughter"], | |
| ["woman", "man", "aunt"], | |
| ] | |
| iface = gr.Interface( | |
| fn=inference, | |
| inputs=[Word1, Word2, Word3], | |
| outputs=sp, | |
| description=description, | |
| examples=examples | |
| ) | |
| iface.launch() |