import gensim.downloader import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.manifold import TSNE model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50 cache = "/home/user/app/d.png" # Function to reduce dimensions def reduce_dimensions(data, method='PCA'): if method == 'PCA': model = PCA(n_components=2) elif method == 'TSNE': model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4) return model.fit_transform(data) # Plotting function def plot_reduced_data(reduced_data, labels, title): plt.figure(figsize=(10, 8)) plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6) for i, label in enumerate(labels): plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18) plt.title(title) plt.xlabel('Component 1') plt.ylabel('Component 2') plt.grid(True) plt.savefig(cache) #, dpi=300) description = """ ### Word Embedding Demo App Universidade Federal de São Paulo - Escola Paulista de Medicina The output is the 10 words most correlated with Word 1. All words must be entered in small caps. Credits: * Gensim * Word2Vec """ Word1 = gr.Textbox() sp = gr.Image() def inference(word1): output = model.similar_by_vector(word1) print(output) word_list = [word1] word_list.extend([x for x,y in [item for item in output]]) words = {key: model[key] for key in word_list} data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0) print(data.shape) labels = words.keys() reduced_data_pca = reduce_dimensions(data, method='PCA') print(reduced_data_pca.shape) plot_reduced_data(reduced_data_pca, labels, 'PCA Results') return cache examples = [ ["liver"], ["brainstem"], ["lungs"] ] iface = gr.Interface( fn=inference, inputs=[Word1], outputs=sp, description=description, examples=examples ) iface.launch()